Merge branch 'main' into feature/support_encrypted_files_20251209

* main:
  Docs: Enhance API reference for file management (#11827)
  fix: prevent redundant retries in async_chat_streamly upon success (#11832)
  Bump infinity to v0.6.11. Requires python>=3.11 (#11814)
This commit is contained in:
virgilwong 2025-12-09 17:38:05 +08:00
commit d1483a80b2
27 changed files with 2652 additions and 1077 deletions

1
.github/copilot-instructions.md vendored Normal file
View file

@ -0,0 +1 @@
Refer to [AGENTS.MD](../AGENTS.md) for all repo instructions.

View file

@ -3,11 +3,12 @@ name: release
on:
schedule:
- cron: '0 13 * * *' # This schedule runs every 13:00:00Z(21:00:00+08:00)
# https://github.com/orgs/community/discussions/26286?utm_source=chatgpt.com#discussioncomment-3251208
# "The create event does not support branch filter and tag filter."
# The "create tags" trigger is specifically focused on the creation of new tags, while the "push tags" trigger is activated when tags are pushed, including both new tag creations and updates to existing tags.
create:
push:
tags:
- "v*.*.*" # normal release
- "nightly" # the only one mutable tag
# https://docs.github.com/en/actions/using-jobs/using-concurrency
concurrency:
@ -21,9 +22,9 @@ jobs:
- name: Ensure workspace ownership
run: echo "chown -R ${USER} ${GITHUB_WORKSPACE}" && sudo chown -R ${USER} ${GITHUB_WORKSPACE}
# https://github.com/actions/checkout/blob/v3/README.md
# https://github.com/actions/checkout/blob/v6/README.md
- name: Check out code
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
token: ${{ secrets.GITHUB_TOKEN }} # Use the secret as an environment variable
fetch-depth: 0
@ -31,12 +32,12 @@ jobs:
- name: Prepare release body
run: |
if [[ ${GITHUB_EVENT_NAME} == "create" ]]; then
if [[ ${GITHUB_EVENT_NAME} != "schedule" ]]; then
RELEASE_TAG=${GITHUB_REF#refs/tags/}
if [[ ${RELEASE_TAG} == "nightly" ]]; then
PRERELEASE=true
else
if [[ ${RELEASE_TAG} == v* ]]; then
PRERELEASE=false
else
PRERELEASE=true
fi
echo "Workflow triggered by create tag: ${RELEASE_TAG}"
else
@ -55,7 +56,7 @@ jobs:
git fetch --tags
if [[ ${GITHUB_EVENT_NAME} == "schedule" ]]; then
# Determine if a given tag exists and matches a specific Git commit.
# actions/checkout@v4 fetch-tags doesn't work when triggered by schedule
# actions/checkout@v6 fetch-tags doesn't work when triggered by schedule
if [ "$(git rev-parse -q --verify "refs/tags/${RELEASE_TAG}")" = "${GITHUB_SHA}" ]; then
echo "mutable tag ${RELEASE_TAG} exists and matches ${GITHUB_SHA}"
else
@ -88,7 +89,7 @@ jobs:
- name: Build and push image
run: |
sudo docker login --username infiniflow --password-stdin <<< ${{ secrets.DOCKERHUB_TOKEN }}
sudo docker build --build-arg NEED_MIRROR=1 -t infiniflow/ragflow:${RELEASE_TAG} -f Dockerfile .
sudo docker build --build-arg NEED_MIRROR=1 --build-arg HTTPS_PROXY=${HTTPS_PROXY} --build-arg HTTP_PROXY=${HTTP_PROXY} -t infiniflow/ragflow:${RELEASE_TAG} -f Dockerfile .
sudo docker tag infiniflow/ragflow:${RELEASE_TAG} infiniflow/ragflow:latest
sudo docker push infiniflow/ragflow:${RELEASE_TAG}
sudo docker push infiniflow/ragflow:latest

View file

@ -34,9 +34,6 @@ jobs:
if: ${{ github.event_name != 'pull_request' || (github.event.pull_request.draft == false && contains(github.event.pull_request.labels.*.name, 'ci')) }}
runs-on: [ "self-hosted", "ragflow-test" ]
steps:
# https://github.com/hmarr/debug-action
#- uses: hmarr/debug-action@v2
- name: Ensure workspace ownership
run: |
echo "Workflow triggered by ${{ github.event_name }}"
@ -44,7 +41,7 @@ jobs:
# https://github.com/actions/checkout/issues/1781
- name: Check out code
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.sha }}
fetch-depth: 0
@ -129,7 +126,7 @@ jobs:
- name: Run unit test
run: |
uv sync --python 3.10 --group test --frozen
uv sync --python 3.11 --group test --frozen
source .venv/bin/activate
which pytest || echo "pytest not in PATH"
echo "Start to run unit test"
@ -141,7 +138,7 @@ jobs:
RAGFLOW_IMAGE=infiniflow/ragflow:${GITHUB_RUN_ID}
echo "RAGFLOW_IMAGE=${RAGFLOW_IMAGE}" >> ${GITHUB_ENV}
sudo docker pull ubuntu:22.04
sudo DOCKER_BUILDKIT=1 docker build --build-arg NEED_MIRROR=1 -f Dockerfile -t ${RAGFLOW_IMAGE} .
sudo DOCKER_BUILDKIT=1 docker build --build-arg NEED_MIRROR=1 --build-arg HTTPS_PROXY=${HTTPS_PROXY} --build-arg HTTP_PROXY=${HTTP_PROXY} -f Dockerfile -t ${RAGFLOW_IMAGE} .
if [[ ${GITHUB_EVENT_NAME} == "schedule" ]]; then
export HTTP_API_TEST_LEVEL=p3
else
@ -201,7 +198,7 @@ jobs:
echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV}
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} up -d
uv sync --python 3.10 --only-group test --no-default-groups --frozen && uv pip install sdk/python --group test
uv sync --python 3.11 --only-group test --no-default-groups --frozen && uv pip install sdk/python --group test
- name: Run sdk tests against Elasticsearch
run: |

110
AGENTS.md Normal file
View file

@ -0,0 +1,110 @@
# RAGFlow Project Instructions for GitHub Copilot
This file provides context, build instructions, and coding standards for the RAGFlow project.
It is structured to follow GitHub Copilot's [customization guidelines](https://docs.github.com/en/copilot/concepts/prompting/response-customization).
## 1. Project Overview
RAGFlow is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It is a full-stack application with a Python backend and a React/TypeScript frontend.
- **Backend**: Python 3.10+ (Flask/Quart)
- **Frontend**: TypeScript, React, UmiJS
- **Architecture**: Microservices based on Docker.
- `api/`: Backend API server.
- `rag/`: Core RAG logic (indexing, retrieval).
- `deepdoc/`: Document parsing and OCR.
- `web/`: Frontend application.
## 2. Directory Structure
- `api/`: Backend API server (Flask/Quart).
- `apps/`: API Blueprints (Knowledge Base, Chat, etc.).
- `db/`: Database models and services.
- `rag/`: Core RAG logic.
- `llm/`: LLM, Embedding, and Rerank model abstractions.
- `deepdoc/`: Document parsing and OCR modules.
- `agent/`: Agentic reasoning components.
- `web/`: Frontend application (React + UmiJS).
- `docker/`: Docker deployment configurations.
- `sdk/`: Python SDK.
- `test/`: Backend tests.
## 3. Build Instructions
### Backend (Python)
The project uses **uv** for dependency management.
1. **Setup Environment**:
```bash
uv sync --python 3.11 --all-extras
uv run download_deps.py
```
2. **Run Server**:
- **Pre-requisite**: Start dependent services (MySQL, ES/Infinity, Redis, MinIO).
```bash
docker compose -f docker/docker-compose-base.yml up -d
```
- **Launch**:
```bash
source .venv/bin/activate
export PYTHONPATH=$(pwd)
bash docker/launch_backend_service.sh
```
### Frontend (TypeScript/React)
Located in `web/`.
1. **Install Dependencies**:
```bash
cd web
npm install
```
2. **Run Dev Server**:
```bash
npm run dev
```
Runs on port 8000 by default.
### Docker Deployment
To run the full stack using Docker:
```bash
cd docker
docker compose -f docker-compose.yml up -d
```
## 4. Testing Instructions
### Backend Tests
- **Run All Tests**:
```bash
uv run pytest
```
- **Run Specific Test**:
```bash
uv run pytest test/test_api.py
```
### Frontend Tests
- **Run Tests**:
```bash
cd web
npm run test
```
## 5. Coding Standards & Guidelines
- **Python Formatting**: Use `ruff` for linting and formatting.
```bash
ruff check
ruff format
```
- **Frontend Linting**:
```bash
cd web
npm run lint
```
- **Pre-commit**: Ensure pre-commit hooks are installed.
```bash
pre-commit install
pre-commit run --all-files
```

View file

@ -45,7 +45,7 @@ RAGFlow is an open-source RAG (Retrieval-Augmented Generation) engine based on d
### Backend Development
```bash
# Install Python dependencies
uv sync --python 3.10 --all-extras
uv sync --python 3.11 --all-extras
uv run download_deps.py
pre-commit install

View file

@ -49,20 +49,24 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
apt install -y libatk-bridge2.0-0 && \
apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
apt install -y libjemalloc-dev && \
apt install -y python3-pip pipx nginx unzip curl wget git vim less && \
apt install -y nginx unzip curl wget git vim less && \
apt install -y ghostscript && \
apt install -y pandoc && \
apt install -y texlive
RUN if [ "$NEED_MIRROR" == "1" ]; then \
pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
pip3 config set global.trusted-host pypi.tuna.tsinghua.edu.cn; \
# Install uv
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
if [ "$NEED_MIRROR" == "1" ]; then \
mkdir -p /etc/uv && \
echo "[[index]]" > /etc/uv/uv.toml && \
echo 'python-install-mirror = "https://registry.npmmirror.com/-/binary/python-build-standalone/"' > /etc/uv/uv.toml && \
echo '[[index]]' >> /etc/uv/uv.toml && \
echo 'url = "https://pypi.tuna.tsinghua.edu.cn/simple"' >> /etc/uv/uv.toml && \
echo "default = true" >> /etc/uv/uv.toml; \
echo 'default = true' >> /etc/uv/uv.toml; \
fi; \
pipx install uv
tar xzf /deps/uv-x86_64-unknown-linux-gnu.tar.gz \
&& cp uv-x86_64-unknown-linux-gnu/* /usr/local/bin/ \
&& rm -rf uv-x86_64-unknown-linux-gnu \
&& uv python install 3.11
ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
ENV PATH=/root/.local/bin:$PATH
@ -147,7 +151,7 @@ RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
else \
sed -i 's|pypi.tuna.tsinghua.edu.cn|pypi.org|g' uv.lock; \
fi; \
uv sync --python 3.10 --frozen
uv sync --python 3.11 --frozen
COPY web web
COPY docs docs

View file

@ -3,7 +3,7 @@
FROM scratch
# Copy resources downloaded via download_deps.py
COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.0.0.jar tika-server-standard-3.0.0.jar.md5 libssl*.deb /
COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.0.0.jar tika-server-standard-3.0.0.jar.md5 libssl*.deb uv-x86_64-unknown-linux-gnu.tar.gz /
COPY nltk_data /nltk_data

View file

@ -314,7 +314,7 @@ docker build --platform linux/amd64 -f Dockerfile -t infiniflow/ragflow:nightly
```bash
git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
uv sync --python 3.10 # install RAGFlow dependent python modules
uv sync --python 3.11 # install RAGFlow dependent python modules
uv run download_deps.py
pre-commit install
```

View file

@ -288,7 +288,7 @@ docker build --platform linux/amd64 -f Dockerfile -t infiniflow/ragflow:nightly
```bash
git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
uv sync --python 3.10 # install RAGFlow dependent python modules
uv sync --python 3.11 # install RAGFlow dependent python modules
uv run download_deps.py
pre-commit install
```

View file

@ -288,7 +288,7 @@ docker build --platform linux/amd64 -f Dockerfile -t infiniflow/ragflow:nightly
```bash
git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
uv sync --python 3.10 # install RAGFlow dependent python modules
uv sync --python 3.11 # install RAGFlow dependent python modules
uv run download_deps.py
pre-commit install
```

View file

@ -283,7 +283,7 @@ docker build --platform linux/amd64 -f Dockerfile -t infiniflow/ragflow:nightly
```bash
git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
uv sync --python 3.10 # install RAGFlow dependent python modules
uv sync --python 3.11 # install RAGFlow dependent python modules
uv run download_deps.py
pre-commit install
```

View file

@ -305,7 +305,7 @@ docker build --platform linux/amd64 -f Dockerfile -t infiniflow/ragflow:nightly
```bash
git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
uv sync --python 3.10 # instala os módulos Python dependentes do RAGFlow
uv sync --python 3.11 # instala os módulos Python dependentes do RAGFlow
uv run download_deps.py
pre-commit install
```

View file

@ -315,7 +315,7 @@ docker build --platform linux/amd64 -f Dockerfile -t infiniflow/ragflow:nightly
```bash
git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
uv sync --python 3.10 # install RAGFlow dependent python modules
uv sync --python 3.11 # install RAGFlow dependent python modules
uv run download_deps.py
pre-commit install
```

View file

@ -315,7 +315,7 @@ docker build --platform linux/amd64 -f Dockerfile -t infiniflow/ragflow:nightly
```bash
git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
uv sync --python 3.10 # install RAGFlow dependent python modules
uv sync --python 3.11 # install RAGFlow dependent python modules
uv run download_deps.py
pre-commit install
```

View file

@ -56,7 +56,7 @@ async def async_request(
method: str,
url: str,
*,
timeout: float | httpx.Timeout | None = None,
request_timeout: float | httpx.Timeout | None = None,
follow_redirects: bool | None = None,
max_redirects: Optional[int] = None,
headers: Optional[Dict[str, str]] = None,
@ -67,7 +67,7 @@ async def async_request(
**kwargs: Any,
) -> httpx.Response:
"""Lightweight async HTTP wrapper using httpx.AsyncClient with safe defaults."""
timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
timeout = request_timeout if request_timeout is not None else DEFAULT_TIMEOUT
follow_redirects = (
DEFAULT_FOLLOW_REDIRECTS if follow_redirects is None else follow_redirects
)

View file

@ -150,7 +150,7 @@ class MCPToolCallSession(ToolCallSession):
except asyncio.CancelledError:
break
async def _call_mcp_server(self, task_type: MCPTaskType, timeout: float | int = 8, **kwargs) -> Any:
async def _call_mcp_server(self, task_type: MCPTaskType, request_timeout: float | int = 8, **kwargs) -> Any:
if self._close:
raise ValueError("Session is closed")
@ -158,18 +158,18 @@ class MCPToolCallSession(ToolCallSession):
await self._queue.put((task_type, kwargs, results))
try:
result: CallToolResult | Exception = await asyncio.wait_for(results.get(), timeout=timeout)
result: CallToolResult | Exception = await asyncio.wait_for(results.get(), timeout=request_timeout)
if isinstance(result, Exception):
raise result
return result
except asyncio.TimeoutError:
raise asyncio.TimeoutError(f"MCP task '{task_type}' timeout after {timeout}s")
raise asyncio.TimeoutError(f"MCP task '{task_type}' timeout after {request_timeout}s")
except Exception:
raise
async def _call_mcp_tool(self, name: str, arguments: dict[str, Any], timeout: float | int = 10) -> str:
async def _call_mcp_tool(self, name: str, arguments: dict[str, Any], request_timeout: float | int = 10) -> str:
result: CallToolResult = await self._call_mcp_server("tool_call", name=name, arguments=arguments,
timeout=timeout)
request_timeout=request_timeout)
if result.isError:
return f"MCP server error: {result.content}"
@ -180,9 +180,9 @@ class MCPToolCallSession(ToolCallSession):
else:
return f"Unsupported content type {type(result.content)}"
async def _get_tools_from_mcp_server(self, timeout: float | int = 8) -> list[Tool]:
async def _get_tools_from_mcp_server(self, request_timeout: float | int = 8) -> list[Tool]:
try:
result: ListToolsResult = await self._call_mcp_server("list_tools", timeout=timeout)
result: ListToolsResult = await self._call_mcp_server("list_tools", request_timeout=request_timeout)
return result.tools
except Exception:
raise
@ -191,7 +191,7 @@ class MCPToolCallSession(ToolCallSession):
if self._close:
raise ValueError("Session is closed")
future = asyncio.run_coroutine_threadsafe(self._get_tools_from_mcp_server(timeout=timeout), self._event_loop)
future = asyncio.run_coroutine_threadsafe(self._get_tools_from_mcp_server(request_timeout=timeout), self._event_loop)
try:
return future.result(timeout=timeout)
except FuturesTimeoutError:

View file

@ -72,7 +72,7 @@ services:
infinity:
profiles:
- infinity
image: infiniflow/infinity:v0.6.10
image: infiniflow/infinity:v0.6.11
volumes:
- infinity_data:/var/infinity
- ./infinity_conf.toml:/infinity_conf.toml

View file

@ -1,5 +1,5 @@
[general]
version = "0.6.10"
version = "0.6.11"
time_zone = "utc-8"
[network]

View file

@ -41,13 +41,19 @@ cd ragflow/
pipx install uv
```
2. Install Python dependencies:
2. Install RAGFlow service's Python dependencies:
```bash
uv sync --python 3.10 # install RAGFlow dependent python modules
uv sync --python 3.11 --frozen
```
*A virtual environment named `.venv` is created, and all Python dependencies are installed into the new environment.*
If you need to run tests against the RAGFlow service, install the test dependencies:
```bash
uv sync --python 3.11 --group test --frozen && uv pip install sdk/python --group test
```
### Launch third-party services
The following command launches the 'base' services (MinIO, Elasticsearch, Redis, and MySQL) using Docker Compose:

View file

@ -176,7 +176,7 @@ This section is contributed by our community contributor [yiminghub2024](https:/
iii. Copy [docker/entrypoint.sh](https://github.com/infiniflow/ragflow/blob/main/docker/entrypoint.sh) locally.
iv. Install the required dependencies using `uv`:
- Run `uv add mcp` or
- Copy [pyproject.toml](https://github.com/infiniflow/ragflow/blob/main/pyproject.toml) locally and run `uv sync --python 3.10`.
- Copy [pyproject.toml](https://github.com/infiniflow/ragflow/blob/main/pyproject.toml) locally and run `uv sync --python 3.11`.
2. Edit **docker-compose.yml** to enable MCP (disabled by default).
3. Launch the MCP server:

View file

@ -4522,3 +4522,687 @@ Explanation:
- Each service is reported as "ok" or "nok".
- The top-level `status` reflects overall health.
- If any service is "nok", detailed error info appears in `_meta`.
---
## FILE MANAGEMENT
---
### Upload file
**POST** `/api/v1/file/upload`
Uploads one or multiple files to the system.
#### Request
- Method: POST
- URL: `/api/v1/file/upload`
- Headers:
- `'Content-Type: multipart/form-data'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Form:
- `'file=@{FILE_PATH}'`
- `'parent_id'`: `string` (optional)
##### Request example
```bash
curl --request POST \
--url http://{address}/api/v1/file/upload \
--header 'Content-Type: multipart/form-data' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--form 'file=@./test1.txt' \
--form 'file=@./test2.pdf' \
--form 'parent_id={folder_id}'
```
##### Request parameters
- `'file'`: (*Form parameter*), `file`, *Required*
The file(s) to upload. Multiple files can be uploaded in a single request.
- `'parent_id'`: (*Form parameter*), `string`
The parent folder ID where the file will be uploaded. If not specified, files will be uploaded to the root folder.
#### Response
Success:
```json
{
"code": 0,
"data": [
{
"id": "b330ec2e91ec11efbc510242ac120004",
"name": "test1.txt",
"size": 17966,
"type": "doc",
"parent_id": "527fa74891e811ef9c650242ac120006",
"location": "test1.txt",
"create_time": 1729763127646
}
]
}
```
Failure:
```json
{
"code": 400,
"message": "No file part!"
}
```
---
### Create file or folder
**POST** `/api/v1/file/create`
Creates a new file or folder in the system.
#### Request
- Method: POST
- URL: `/api/v1/file/create`
- Headers:
- `'Content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `"name"`: `string`
- `"parent_id"`: `string` (optional)
- `"type"`: `string`
##### Request example
```bash
curl --request POST \
--url http://{address}/api/v1/file/create \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"name": "New Folder",
"type": "FOLDER",
"parent_id": "{folder_id}"
}'
```
##### Request parameters
- `"name"`: (*Body parameter*), `string`, *Required*
The name of the file or folder to create.
- `"parent_id"`: (*Body parameter*), `string`
The parent folder ID. If not specified, the file/folder will be created in the root folder.
- `"type"`: (*Body parameter*), `string`
The type of the file to create. Available options:
- `"FOLDER"`: Create a folder
- `"VIRTUAL"`: Create a virtual file
#### Response
Success:
```json
{
"code": 0,
"data": {
"id": "b330ec2e91ec11efbc510242ac120004",
"name": "New Folder",
"type": "FOLDER",
"parent_id": "527fa74891e811ef9c650242ac120006",
"size": 0,
"create_time": 1729763127646
}
}
```
Failure:
```json
{
"code": 409,
"message": "Duplicated folder name in the same folder."
}
```
---
### List files
**GET** `/api/v1/file/list?parent_id={parent_id}&keywords={keywords}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}`
Lists files and folders under a specific folder.
#### Request
- Method: GET
- URL: `/api/v1/file/list?parent_id={parent_id}&keywords={keywords}&page={page}&page_size={page_size}&orderby={orderby}&desc={desc}`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request GET \
--url 'http://{address}/api/v1/file/list?parent_id={folder_id}&page=1&page_size=15' \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
##### Request parameters
- `parent_id`: (*Filter parameter*), `string`
The folder ID to list files from. If not specified, the root folder is used by default.
- `keywords`: (*Filter parameter*), `string`
Search keyword to filter files by name.
- `page`: (*Filter parameter*), `integer`
Specifies the page on which the files will be displayed. Defaults to `1`.
- `page_size`: (*Filter parameter*), `integer`
The number of files on each page. Defaults to `15`.
- `orderby`: (*Filter parameter*), `string`
The field by which files should be sorted. Available options:
- `create_time` (default)
- `desc`: (*Filter parameter*), `boolean`
Indicates whether the retrieved files should be sorted in descending order. Defaults to `true`.
#### Response
Success:
```json
{
"code": 0,
"data": {
"total": 10,
"files": [
{
"id": "b330ec2e91ec11efbc510242ac120004",
"name": "test1.txt",
"type": "doc",
"size": 17966,
"parent_id": "527fa74891e811ef9c650242ac120006",
"create_time": 1729763127646
}
],
"parent_folder": {
"id": "527fa74891e811ef9c650242ac120006",
"name": "Parent Folder"
}
}
}
```
Failure:
```json
{
"code": 404,
"message": "Folder not found!"
}
```
---
### Get root folder
**GET** `/api/v1/file/root_folder`
Retrieves the user's root folder information.
#### Request
- Method: GET
- URL: `/api/v1/file/root_folder`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request GET \
--url http://{address}/api/v1/file/root_folder \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
##### Request parameters
No parameters required.
#### Response
Success:
```json
{
"code": 0,
"data": {
"root_folder": {
"id": "527fa74891e811ef9c650242ac120006",
"name": "root",
"type": "FOLDER"
}
}
}
```
---
### Get parent folder
**GET** `/api/v1/file/parent_folder?file_id={file_id}`
Retrieves the immediate parent folder information of a specified file.
#### Request
- Method: GET
- URL: `/api/v1/file/parent_folder?file_id={file_id}`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request GET \
--url 'http://{address}/api/v1/file/parent_folder?file_id={file_id}' \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
##### Request parameters
- `file_id`: (*Filter parameter*), `string`, *Required*
The ID of the file whose immediate parent folder to retrieve.
#### Response
Success:
```json
{
"code": 0,
"data": {
"parent_folder": {
"id": "527fa74891e811ef9c650242ac120006",
"name": "Parent Folder"
}
}
}
```
Failure:
```json
{
"code": 404,
"message": "Folder not found!"
}
```
---
### Get all parent folders
**GET** `/api/v1/file/all_parent_folder?file_id={file_id}`
Retrieves all parent folders of a specified file in the folder hierarchy.
#### Request
- Method: GET
- URL: `/api/v1/file/all_parent_folder?file_id={file_id}`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request GET \
--url 'http://{address}/api/v1/file/all_parent_folder?file_id={file_id}' \
--header 'Authorization: Bearer <YOUR_API_KEY>'
```
##### Request parameters
- `file_id`: (*Filter parameter*), `string`, *Required*
The ID of the file whose parent folders to retrieve.
#### Response
Success:
```json
{
"code": 0,
"data": {
"parent_folders": [
{
"id": "527fa74891e811ef9c650242ac120006",
"name": "Parent Folder 1"
},
{
"id": "627fa74891e811ef9c650242ac120007",
"name": "Parent Folder 2"
}
]
}
}
```
Failure:
```json
{
"code": 404,
"message": "Folder not found!"
}
```
---
### Delete files
**POST** `/api/v1/file/rm`
Deletes one or multiple files or folders.
#### Request
- Method: POST
- URL: `/api/v1/file/rm`
- Headers:
- `'Content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `"file_ids"`: `list[string]`
##### Request example
```bash
curl --request POST \
--url http://{address}/api/v1/file/rm \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"file_ids": ["file_id_1", "file_id_2"]
}'
```
##### Request parameters
- `"file_ids"`: (*Body parameter*), `list[string]`, *Required*
The IDs of the files or folders to delete.
#### Response
Success:
```json
{
"code": 0,
"data": true
}
```
Failure:
```json
{
"code": 404,
"message": "File or Folder not found!"
}
```
---
### Rename file
**POST** `/api/v1/file/rename`
Renames a file or folder.
#### Request
- Method: POST
- URL: `/api/v1/file/rename`
- Headers:
- `'Content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `"file_id"`: `string`
- `"name"`: `string`
##### Request example
```bash
curl --request POST \
--url http://{address}/api/v1/file/rename \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"file_id": "{file_id}",
"name": "new_name.txt"
}'
```
##### Request parameters
- `"file_id"`: (*Body parameter*), `string`, *Required*
The ID of the file or folder to rename.
- `"name"`: (*Body parameter*), `string`, *Required*
The new name for the file or folder. Note: Changing file extensions is *not* supported.
#### Response
Success:
```json
{
"code": 0,
"data": true
}
```
Failure:
```json
{
"code": 400,
"message": "The extension of file can't be changed"
}
```
or
```json
{
"code": 409,
"message": "Duplicated file name in the same folder."
}
```
---
### Download file
**GET** `/api/v1/file/get/{file_id}`
Downloads a file from the system.
#### Request
- Method: GET
- URL: `/api/v1/file/get/{file_id}`
- Headers:
- `'Authorization: Bearer <YOUR_API_KEY>'`
##### Request example
```bash
curl --request GET \
--url http://{address}/api/v1/file/get/{file_id} \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--output ./downloaded_file.txt
```
##### Request parameters
- `file_id`: (*Path parameter*), `string`, *Required*
The ID of the file to download.
#### Response
Success:
Returns the file content as a binary stream with appropriate Content-Type headers.
Failure:
```json
{
"code": 404,
"message": "Document not found!"
}
```
---
### Move files
**POST** `/api/v1/file/mv`
Moves one or multiple files or folders to a specified folder.
#### Request
- Method: POST
- URL: `/api/v1/file/mv`
- Headers:
- `'Content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `"src_file_ids"`: `list[string]`
- `"dest_file_id"`: `string`
##### Request example
```bash
curl --request POST \
--url http://{address}/api/v1/file/mv \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"src_file_ids": ["file_id_1", "file_id_2"],
"dest_file_id": "{destination_folder_id}"
}'
```
##### Request parameters
- `"src_file_ids"`: (*Body parameter*), `list[string]`, *Required*
The IDs of the files or folders to move.
- `"dest_file_id"`: (*Body parameter*), `string`, *Required*
The ID of the destination folder.
#### Response
Success:
```json
{
"code": 0,
"data": true
}
```
Failure:
```json
{
"code": 404,
"message": "File or Folder not found!"
}
```
or
```json
{
"code": 404,
"message": "Parent Folder not found!"
}
```
---
### Convert files to documents and link them to datasets
**POST** `/api/v1/file/convert`
Converts files to documents and links them to specified datasets.
#### Request
- Method: POST
- URL: `/api/v1/file/convert`
- Headers:
- `'Content-Type: application/json'`
- `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
- `"file_ids"`: `list[string]`
- `"kb_ids"`: `list[string]`
##### Request example
```bash
curl --request POST \
--url http://{address}/api/v1/file/convert \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer <YOUR_API_KEY>' \
--data '{
"file_ids": ["file_id_1", "file_id_2"],
"kb_ids": ["dataset_id_1", "dataset_id_2"]
}'
```
##### Request parameters
- `"file_ids"`: (*Body parameter*), `list[string]`, *Required*
The IDs of the files to convert. If a folder ID is provided, all files within that folder will be converted.
- `"kb_ids"`: (*Body parameter*), `list[string]`, *Required*
The IDs of the target datasets.
#### Response
Success:
```json
{
"code": 0,
"data": [
{
"id": "file2doc_id_1",
"file_id": "file_id_1",
"document_id": "document_id_1"
}
]
}
```
Failure:
```json
{
"code": 404,
"message": "File not found!"
}
```
or
```json
{
"code": 404,
"message": "Can't find this knowledgebase!"
}
```

View file

@ -28,6 +28,7 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]:
"https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken",
["https://registry.npmmirror.com/-/binary/chrome-for-testing/121.0.6167.85/linux64/chrome-linux64.zip", "chrome-linux64-121-0-6167-85"],
["https://registry.npmmirror.com/-/binary/chrome-for-testing/121.0.6167.85/linux64/chromedriver-linux64.zip", "chromedriver-linux64-121-0-6167-85"],
"https://github.com/astral-sh/uv/releases/download/0.9.16/uv-x86_64-unknown-linux-gnu.tar.gz",
]
else:
return [
@ -38,6 +39,7 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]:
"https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken",
["https://storage.googleapis.com/chrome-for-testing-public/121.0.6167.85/linux64/chrome-linux64.zip", "chrome-linux64-121-0-6167-85"],
["https://storage.googleapis.com/chrome-for-testing-public/121.0.6167.85/linux64/chromedriver-linux64.zip", "chromedriver-linux64-121-0-6167-85"],
"https://github.com/astral-sh/uv/releases/download/0.9.16/uv-x86_64-unknown-linux-gnu.tar.gz",
]

View file

@ -96,7 +96,7 @@ ragflow:
infinity:
image:
repository: infiniflow/infinity
tag: v0.6.10
tag: v0.6.11
pullPolicy: IfNotPresent
pullSecrets: []
storage:

View file

@ -5,7 +5,7 @@ description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-A
authors = [{ name = "Zhichang Yu", email = "yuzhichang@gmail.com" }]
license-files = ["LICENSE"]
readme = "README.md"
requires-python = ">=3.10,<3.13"
requires-python = ">=3.11,<3.15"
dependencies = [
"datrie>=0.8.3,<0.9.0",
"akshare>=1.15.78,<2.0.0",
@ -49,7 +49,7 @@ dependencies = [
"html-text==0.6.2",
"httpx[socks]>=0.28.1,<0.29.0",
"huggingface-hub>=0.25.0,<0.26.0",
"infinity-sdk==0.6.10",
"infinity-sdk==0.6.11",
"infinity-emb>=0.0.66,<0.0.67",
"itsdangerous==2.1.2",
"json-repair==0.35.0",
@ -92,7 +92,7 @@ dependencies = [
"ranx==0.3.20",
"readability-lxml==0.8.1",
"valkey==6.0.2",
"requests==2.32.2",
"requests>=2.32.3,<3.0.0",
"replicate==0.31.0",
"roman-numbers==1.0.2",
"ruamel-base==1.0.0",
@ -101,7 +101,7 @@ dependencies = [
"scikit-learn==1.5.0",
"selenium==4.22.0",
"selenium-wire==5.1.0",
"setuptools>=75.2.0,<76.0.0",
"setuptools>=78.1.1,<81.0.0",
"shapely==2.0.5",
"six==1.16.0",
"slack-sdk==3.37.0",

View file

@ -187,6 +187,9 @@ class Base(ABC):
ans = delta_ans
total_tokens += tol
yield ans
yield total_tokens
return
except Exception as e:
e = await self._exceptions_async(e, attempt)
if e:
@ -194,8 +197,6 @@ class Base(ABC):
yield total_tokens
return
yield total_tokens
def _length_stop(self, ans):
if is_chinese([ans]):
return ans + LENGTH_NOTIFICATION_CN

View file

@ -443,6 +443,9 @@ class InfinityConnection(DocStoreConnection):
del matchExpr.extra_options["similarity"]
logger.debug(f"INFINITY search MatchDenseExpr: {json.dumps(matchExpr.__dict__)}")
elif isinstance(matchExpr, FusionExpr):
if matchExpr.method == "weighted_sum":
# The default is "minmax" which gives a zero score for the last doc.
matchExpr.fusion_params["normalize"] = "atan"
logger.debug(f"INFINITY search FusionExpr: {json.dumps(matchExpr.__dict__)}")
order_by_expr_list = list()

2808
uv.lock generated

File diff suppressed because it is too large Load diff