Merge branch 'main' into fix_-asyn
This commit is contained in:
commit
3f853ee722
22 changed files with 482 additions and 238 deletions
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
|
|
@ -31,7 +31,7 @@ jobs:
|
||||||
name: ragflow_tests
|
name: ragflow_tests
|
||||||
# https://docs.github.com/en/actions/using-jobs/using-conditions-to-control-job-execution
|
# https://docs.github.com/en/actions/using-jobs/using-conditions-to-control-job-execution
|
||||||
# https://github.com/orgs/community/discussions/26261
|
# https://github.com/orgs/community/discussions/26261
|
||||||
if: ${{ github.event_name != 'pull_request_target' || (contains(github.event.pull_request.labels.*.name, 'ci') && github.event.pull_request.mergeable == true) }}
|
if: ${{ github.event_name != 'pull_request_target' || (contains(github.event.pull_request.labels.*.name, 'ci') && github.event.pull_request.mergeable != false) }}
|
||||||
runs-on: [ "self-hosted", "ragflow-test" ]
|
runs-on: [ "self-hosted", "ragflow-test" ]
|
||||||
steps:
|
steps:
|
||||||
# https://github.com/hmarr/debug-action
|
# https://github.com/hmarr/debug-action
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,6 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
import base64
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
@ -27,6 +26,7 @@ from typing import Any, Union, Tuple
|
||||||
|
|
||||||
from agent.component import component_class
|
from agent.component import component_class
|
||||||
from agent.component.base import ComponentBase
|
from agent.component.base import ComponentBase
|
||||||
|
from api.db.services.file_service import FileService
|
||||||
from api.db.services.task_service import has_canceled
|
from api.db.services.task_service import has_canceled
|
||||||
from common.misc_utils import get_uuid, hash_str2int
|
from common.misc_utils import get_uuid, hash_str2int
|
||||||
from common.exceptions import TaskCanceledException
|
from common.exceptions import TaskCanceledException
|
||||||
|
|
@ -374,7 +374,7 @@ class Canvas(Graph):
|
||||||
for k in kwargs.keys():
|
for k in kwargs.keys():
|
||||||
if k in ["query", "user_id", "files"] and kwargs[k]:
|
if k in ["query", "user_id", "files"] and kwargs[k]:
|
||||||
if k == "files":
|
if k == "files":
|
||||||
self.globals[f"sys.{k}"] = await self.get_files(kwargs[k])
|
self.globals[f"sys.{k}"] = FileService.get_files(kwargs[k])
|
||||||
else:
|
else:
|
||||||
self.globals[f"sys.{k}"] = kwargs[k]
|
self.globals[f"sys.{k}"] = kwargs[k]
|
||||||
if not self.globals["sys.conversation_turns"] :
|
if not self.globals["sys.conversation_turns"] :
|
||||||
|
|
@ -643,24 +643,6 @@ class Canvas(Graph):
|
||||||
def get_component_input_elements(self, cpnnm):
|
def get_component_input_elements(self, cpnnm):
|
||||||
return self.components[cpnnm]["obj"].get_input_elements()
|
return self.components[cpnnm]["obj"].get_input_elements()
|
||||||
|
|
||||||
async def get_files(self, files: Union[None, list[dict]]) -> list[str]:
|
|
||||||
from api.db.services.file_service import FileService
|
|
||||||
if not files:
|
|
||||||
return []
|
|
||||||
def image_to_base64(file):
|
|
||||||
return "data:{};base64,{}".format(file["mime_type"],
|
|
||||||
base64.b64encode(FileService.get_blob(file["created_by"], file["id"])).decode("utf-8"))
|
|
||||||
|
|
||||||
loop = asyncio.get_running_loop()
|
|
||||||
tasks = []
|
|
||||||
for file in files:
|
|
||||||
if file["mime_type"].find("image") >=0:
|
|
||||||
tasks.append(loop.run_in_executor(None, image_to_base64, file))
|
|
||||||
continue
|
|
||||||
tasks.append(loop.run_in_executor(None, partial(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"])))
|
|
||||||
|
|
||||||
return await asyncio.gather(*tasks)
|
|
||||||
|
|
||||||
def tool_use_callback(self, agent_id: str, func_name: str, params: dict, result: Any, elapsed_time=None):
|
def tool_use_callback(self, agent_id: str, func_name: str, params: dict, result: Any, elapsed_time=None):
|
||||||
agent_ids = agent_id.split("-->")
|
agent_ids = agent_id.split("-->")
|
||||||
agent_name = self.get_component_name(agent_ids[0])
|
agent_name = self.get_component_name(agent_ids[0])
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
from agent.component.fillup import UserFillUpParam, UserFillUp
|
from agent.component.fillup import UserFillUpParam, UserFillUp
|
||||||
|
from api.db.services.file_service import FileService
|
||||||
|
|
||||||
|
|
||||||
class BeginParam(UserFillUpParam):
|
class BeginParam(UserFillUpParam):
|
||||||
|
|
@ -48,7 +49,7 @@ class Begin(UserFillUp):
|
||||||
if v.get("optional") and v.get("value", None) is None:
|
if v.get("optional") and v.get("value", None) is None:
|
||||||
v = None
|
v = None
|
||||||
else:
|
else:
|
||||||
v = self._canvas.get_files([v["value"]])
|
v = FileService.get_files([v["value"]])
|
||||||
else:
|
else:
|
||||||
v = v.get("value")
|
v = v.get("value")
|
||||||
self.set_output(k, v)
|
self.set_output(k, v)
|
||||||
|
|
|
||||||
|
|
@ -15,13 +15,10 @@
|
||||||
#
|
#
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
import trio
|
|
||||||
from quart import request, Response, make_response
|
from quart import request, Response, make_response
|
||||||
from agent.component import LLM
|
from agent.component import LLM
|
||||||
from api.db import CanvasCategory, FileType
|
from api.db import CanvasCategory
|
||||||
from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService, API4ConversationService
|
from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService, API4ConversationService
|
||||||
from api.db.services.document_service import DocumentService
|
from api.db.services.document_service import DocumentService
|
||||||
from api.db.services.file_service import FileService
|
from api.db.services.file_service import FileService
|
||||||
|
|
@ -38,7 +35,6 @@ from peewee import MySQLDatabase, PostgresqlDatabase
|
||||||
from api.db.db_models import APIToken, Task
|
from api.db.db_models import APIToken, Task
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from api.utils.file_utils import filename_type, read_potential_broken_pdf
|
|
||||||
from rag.flow.pipeline import Pipeline
|
from rag.flow.pipeline import Pipeline
|
||||||
from rag.nlp import search
|
from rag.nlp import search
|
||||||
from rag.utils.redis_conn import REDIS_CONN
|
from rag.utils.redis_conn import REDIS_CONN
|
||||||
|
|
@ -250,71 +246,10 @@ async def upload(canvas_id):
|
||||||
return get_data_error_result(message="canvas not found.")
|
return get_data_error_result(message="canvas not found.")
|
||||||
|
|
||||||
user_id = cvs["user_id"]
|
user_id = cvs["user_id"]
|
||||||
def structured(filename, filetype, blob, content_type):
|
|
||||||
nonlocal user_id
|
|
||||||
if filetype == FileType.PDF.value:
|
|
||||||
blob = read_potential_broken_pdf(blob)
|
|
||||||
|
|
||||||
location = get_uuid()
|
|
||||||
FileService.put_blob(user_id, location, blob)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"id": location,
|
|
||||||
"name": filename,
|
|
||||||
"size": sys.getsizeof(blob),
|
|
||||||
"extension": filename.split(".")[-1].lower(),
|
|
||||||
"mime_type": content_type,
|
|
||||||
"created_by": user_id,
|
|
||||||
"created_at": time.time(),
|
|
||||||
"preview_url": None
|
|
||||||
}
|
|
||||||
|
|
||||||
if request.args.get("url"):
|
|
||||||
from crawl4ai import (
|
|
||||||
AsyncWebCrawler,
|
|
||||||
BrowserConfig,
|
|
||||||
CrawlerRunConfig,
|
|
||||||
DefaultMarkdownGenerator,
|
|
||||||
PruningContentFilter,
|
|
||||||
CrawlResult
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
url = request.args.get("url")
|
|
||||||
filename = re.sub(r"\?.*", "", url.split("/")[-1])
|
|
||||||
async def adownload():
|
|
||||||
browser_config = BrowserConfig(
|
|
||||||
headless=True,
|
|
||||||
verbose=False,
|
|
||||||
)
|
|
||||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
||||||
crawler_config = CrawlerRunConfig(
|
|
||||||
markdown_generator=DefaultMarkdownGenerator(
|
|
||||||
content_filter=PruningContentFilter()
|
|
||||||
),
|
|
||||||
pdf=True,
|
|
||||||
screenshot=False
|
|
||||||
)
|
|
||||||
result: CrawlResult = await crawler.arun(
|
|
||||||
url=url,
|
|
||||||
config=crawler_config
|
|
||||||
)
|
|
||||||
return result
|
|
||||||
page = trio.run(adownload())
|
|
||||||
if page.pdf:
|
|
||||||
if filename.split(".")[-1].lower() != "pdf":
|
|
||||||
filename += ".pdf"
|
|
||||||
return get_json_result(data=structured(filename, "pdf", page.pdf, page.response_headers["content-type"]))
|
|
||||||
|
|
||||||
return get_json_result(data=structured(filename, "html", str(page.markdown).encode("utf-8"), page.response_headers["content-type"], user_id))
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return server_error_response(e)
|
|
||||||
|
|
||||||
files = await request.files
|
files = await request.files
|
||||||
file = files['file']
|
file = files['file'] if files and files.get("file") else None
|
||||||
try:
|
try:
|
||||||
DocumentService.check_doc_health(user_id, file.filename)
|
return get_json_result(data=FileService.upload_info(user_id, file, request.args.get("url")))
|
||||||
return get_json_result(data=structured(file.filename, filename_type(file.filename), file.read(), file.content_type))
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -607,7 +607,7 @@ async def get_image(image_id):
|
||||||
@login_required
|
@login_required
|
||||||
@validate_request("conversation_id")
|
@validate_request("conversation_id")
|
||||||
async def upload_and_parse():
|
async def upload_and_parse():
|
||||||
files = await request.file
|
files = await request.files
|
||||||
if "file" not in files:
|
if "file" not in files:
|
||||||
return get_json_result(data=False, message="No file part!", code=RetCode.ARGUMENT_ERROR)
|
return get_json_result(data=False, message="No file part!", code=RetCode.ARGUMENT_ERROR)
|
||||||
|
|
||||||
|
|
@ -705,3 +705,12 @@ async def set_meta():
|
||||||
return get_json_result(data=True)
|
return get_json_result(data=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
@manager.route("/upload_info", methods=["POST"]) # noqa: F821
|
||||||
|
async def upload_info():
|
||||||
|
files = await request.files
|
||||||
|
file = files['file'] if files and files.get("file") else None
|
||||||
|
try:
|
||||||
|
return get_json_result(data=FileService.upload_info(current_user.id, file, request.args.get("url")))
|
||||||
|
except Exception as e:
|
||||||
|
return server_error_response(e)
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ import trio
|
||||||
from langfuse import Langfuse
|
from langfuse import Langfuse
|
||||||
from peewee import fn
|
from peewee import fn
|
||||||
from agentic_reasoning import DeepResearcher
|
from agentic_reasoning import DeepResearcher
|
||||||
|
from api.db.services.file_service import FileService
|
||||||
from common.constants import LLMType, ParserType, StatusEnum
|
from common.constants import LLMType, ParserType, StatusEnum
|
||||||
from api.db.db_models import DB, Dialog
|
from api.db.db_models import DB, Dialog
|
||||||
from api.db.services.common_service import CommonService
|
from api.db.services.common_service import CommonService
|
||||||
|
|
@ -380,8 +381,11 @@ def chat(dialog, messages, stream=True, **kwargs):
|
||||||
retriever = settings.retriever
|
retriever = settings.retriever
|
||||||
questions = [m["content"] for m in messages if m["role"] == "user"][-3:]
|
questions = [m["content"] for m in messages if m["role"] == "user"][-3:]
|
||||||
attachments = kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else []
|
attachments = kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else []
|
||||||
|
attachments_= ""
|
||||||
if "doc_ids" in messages[-1]:
|
if "doc_ids" in messages[-1]:
|
||||||
attachments = messages[-1]["doc_ids"]
|
attachments = messages[-1]["doc_ids"]
|
||||||
|
if "files" in messages[-1]:
|
||||||
|
attachments_ = "\n\n".join(FileService.get_files(messages[-1]["files"]))
|
||||||
|
|
||||||
prompt_config = dialog.prompt_config
|
prompt_config = dialog.prompt_config
|
||||||
field_map = KnowledgebaseService.get_field_map(dialog.kb_ids)
|
field_map = KnowledgebaseService.get_field_map(dialog.kb_ids)
|
||||||
|
|
@ -451,7 +455,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
for think in reasoner.thinking(kbinfos, " ".join(questions)):
|
for think in reasoner.thinking(kbinfos, attachments_ + " ".join(questions)):
|
||||||
if isinstance(think, str):
|
if isinstance(think, str):
|
||||||
thought = think
|
thought = think
|
||||||
knowledges = [t for t in think.split("\n") if t]
|
knowledges = [t for t in think.split("\n") if t]
|
||||||
|
|
@ -503,7 +507,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
||||||
kwargs["knowledge"] = "\n------\n" + "\n\n------\n\n".join(knowledges)
|
kwargs["knowledge"] = "\n------\n" + "\n\n------\n\n".join(knowledges)
|
||||||
gen_conf = dialog.llm_setting
|
gen_conf = dialog.llm_setting
|
||||||
|
|
||||||
msg = [{"role": "system", "content": prompt_config["system"].format(**kwargs)}]
|
msg = [{"role": "system", "content": prompt_config["system"].format(**kwargs)+attachments_}]
|
||||||
prompt4citation = ""
|
prompt4citation = ""
|
||||||
if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)):
|
if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)):
|
||||||
prompt4citation = citation_prompt()
|
prompt4citation = citation_prompt()
|
||||||
|
|
|
||||||
|
|
@ -13,10 +13,15 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
from peewee import fn
|
from peewee import fn
|
||||||
|
|
||||||
|
|
@ -520,7 +525,7 @@ class FileService(CommonService):
|
||||||
if img_base64 and file_type == FileType.VISUAL.value:
|
if img_base64 and file_type == FileType.VISUAL.value:
|
||||||
return GptV4.image2base64(blob)
|
return GptV4.image2base64(blob)
|
||||||
cks = FACTORY.get(FileService.get_parser(filename_type(filename), filename, ""), naive).chunk(filename, blob, **kwargs)
|
cks = FACTORY.get(FileService.get_parser(filename_type(filename), filename, ""), naive).chunk(filename, blob, **kwargs)
|
||||||
return "\n".join([ck["content_with_weight"] for ck in cks])
|
return f"\n -----------------\nFile: {filename}\nContent as following: \n" + "\n".join([ck["content_with_weight"] for ck in cks])
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_parser(doc_type, filename, default):
|
def get_parser(doc_type, filename, default):
|
||||||
|
|
@ -588,3 +593,80 @@ class FileService(CommonService):
|
||||||
errors += str(e)
|
errors += str(e)
|
||||||
|
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def upload_info(user_id, file, url: str|None=None):
|
||||||
|
def structured(filename, filetype, blob, content_type):
|
||||||
|
nonlocal user_id
|
||||||
|
if filetype == FileType.PDF.value:
|
||||||
|
blob = read_potential_broken_pdf(blob)
|
||||||
|
|
||||||
|
location = get_uuid()
|
||||||
|
FileService.put_blob(user_id, location, blob)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": location,
|
||||||
|
"name": filename,
|
||||||
|
"size": sys.getsizeof(blob),
|
||||||
|
"extension": filename.split(".")[-1].lower(),
|
||||||
|
"mime_type": content_type,
|
||||||
|
"created_by": user_id,
|
||||||
|
"created_at": time.time(),
|
||||||
|
"preview_url": None
|
||||||
|
}
|
||||||
|
|
||||||
|
if url:
|
||||||
|
from crawl4ai import (
|
||||||
|
AsyncWebCrawler,
|
||||||
|
BrowserConfig,
|
||||||
|
CrawlerRunConfig,
|
||||||
|
DefaultMarkdownGenerator,
|
||||||
|
PruningContentFilter,
|
||||||
|
CrawlResult
|
||||||
|
)
|
||||||
|
filename = re.sub(r"\?.*", "", url.split("/")[-1])
|
||||||
|
async def adownload():
|
||||||
|
browser_config = BrowserConfig(
|
||||||
|
headless=True,
|
||||||
|
verbose=False,
|
||||||
|
)
|
||||||
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||||
|
crawler_config = CrawlerRunConfig(
|
||||||
|
markdown_generator=DefaultMarkdownGenerator(
|
||||||
|
content_filter=PruningContentFilter()
|
||||||
|
),
|
||||||
|
pdf=True,
|
||||||
|
screenshot=False
|
||||||
|
)
|
||||||
|
result: CrawlResult = await crawler.arun(
|
||||||
|
url=url,
|
||||||
|
config=crawler_config
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
page = asyncio.run(adownload())
|
||||||
|
if page.pdf:
|
||||||
|
if filename.split(".")[-1].lower() != "pdf":
|
||||||
|
filename += ".pdf"
|
||||||
|
return structured(filename, "pdf", page.pdf, page.response_headers["content-type"])
|
||||||
|
|
||||||
|
return structured(filename, "html", str(page.markdown).encode("utf-8"), page.response_headers["content-type"], user_id)
|
||||||
|
|
||||||
|
DocumentService.check_doc_health(user_id, file.filename)
|
||||||
|
return structured(file.filename, filename_type(file.filename), file.read(), file.content_type)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_files(self, files: Union[None, list[dict]]) -> list[str]:
|
||||||
|
if not files:
|
||||||
|
return []
|
||||||
|
def image_to_base64(file):
|
||||||
|
return "data:{};base64,{}".format(file["mime_type"],
|
||||||
|
base64.b64encode(FileService.get_blob(file["created_by"], file["id"])).decode("utf-8"))
|
||||||
|
exe = ThreadPoolExecutor(max_workers=5)
|
||||||
|
threads = []
|
||||||
|
for file in files:
|
||||||
|
if file["mime_type"].find("image") >=0:
|
||||||
|
threads.append(exe.submit(image_to_base64, file))
|
||||||
|
continue
|
||||||
|
threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"]))
|
||||||
|
return [th.result() for th in threads]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
import string
|
||||||
from typing import Annotated, Any, Literal
|
from typing import Annotated, Any, Literal
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
|
|
@ -25,6 +26,7 @@ from pydantic import (
|
||||||
StringConstraints,
|
StringConstraints,
|
||||||
ValidationError,
|
ValidationError,
|
||||||
field_validator,
|
field_validator,
|
||||||
|
model_validator,
|
||||||
)
|
)
|
||||||
from pydantic_core import PydanticCustomError
|
from pydantic_core import PydanticCustomError
|
||||||
from werkzeug.exceptions import BadRequest, UnsupportedMediaType
|
from werkzeug.exceptions import BadRequest, UnsupportedMediaType
|
||||||
|
|
@ -361,10 +363,9 @@ class CreateDatasetReq(Base):
|
||||||
description: Annotated[str | None, Field(default=None, max_length=65535)]
|
description: Annotated[str | None, Field(default=None, max_length=65535)]
|
||||||
embedding_model: Annotated[str | None, Field(default=None, max_length=255, serialization_alias="embd_id")]
|
embedding_model: Annotated[str | None, Field(default=None, max_length=255, serialization_alias="embd_id")]
|
||||||
permission: Annotated[Literal["me", "team"], Field(default="me", min_length=1, max_length=16)]
|
permission: Annotated[Literal["me", "team"], Field(default="me", min_length=1, max_length=16)]
|
||||||
chunk_method: Annotated[
|
chunk_method: Annotated[str | None, Field(default=None, serialization_alias="parser_id")]
|
||||||
Literal["naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table", "tag"],
|
parse_type: Annotated[int | None, Field(default=None, ge=0, le=64)]
|
||||||
Field(default="naive", min_length=1, max_length=32, serialization_alias="parser_id"),
|
pipeline_id: Annotated[str | None, Field(default=None, min_length=32, max_length=32, serialization_alias="pipeline_id")]
|
||||||
]
|
|
||||||
parser_config: Annotated[ParserConfig | None, Field(default=None)]
|
parser_config: Annotated[ParserConfig | None, Field(default=None)]
|
||||||
|
|
||||||
@field_validator("avatar", mode="after")
|
@field_validator("avatar", mode="after")
|
||||||
|
|
@ -525,6 +526,93 @@ class CreateDatasetReq(Base):
|
||||||
raise PydanticCustomError("string_too_long", "Parser config exceeds size limit (max 65,535 characters). Current size: {actual}", {"actual": len(json_str)})
|
raise PydanticCustomError("string_too_long", "Parser config exceeds size limit (max 65,535 characters). Current size: {actual}", {"actual": len(json_str)})
|
||||||
return v
|
return v
|
||||||
|
|
||||||
|
@field_validator("pipeline_id", mode="after")
|
||||||
|
@classmethod
|
||||||
|
def validate_pipeline_id(cls, v: str | None) -> str | None:
|
||||||
|
"""Validate pipeline_id as 32-char lowercase hex string if provided.
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- None or empty string: treat as None (not set)
|
||||||
|
- Must be exactly length 32
|
||||||
|
- Must contain only hex digits (0-9a-fA-F); normalized to lowercase
|
||||||
|
"""
|
||||||
|
if v is None:
|
||||||
|
return None
|
||||||
|
if v == "":
|
||||||
|
return None
|
||||||
|
if len(v) != 32:
|
||||||
|
raise PydanticCustomError("format_invalid", "pipeline_id must be 32 hex characters")
|
||||||
|
if any(ch not in string.hexdigits for ch in v):
|
||||||
|
raise PydanticCustomError("format_invalid", "pipeline_id must be hexadecimal")
|
||||||
|
return v.lower()
|
||||||
|
|
||||||
|
@model_validator(mode="after")
|
||||||
|
def validate_parser_dependency(self) -> "CreateDatasetReq":
|
||||||
|
"""
|
||||||
|
Mixed conditional validation:
|
||||||
|
- If parser_id is omitted (field not set):
|
||||||
|
* If both parse_type and pipeline_id are omitted → default chunk_method = "naive"
|
||||||
|
* If both parse_type and pipeline_id are provided → allow ingestion pipeline mode
|
||||||
|
- If parser_id is provided (valid enum) → parse_type and pipeline_id must be None (disallow mixed usage)
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
PydanticCustomError with code 'dependency_error' on violation.
|
||||||
|
"""
|
||||||
|
# Omitted chunk_method (not in fields) logic
|
||||||
|
if self.chunk_method is None and "chunk_method" not in self.model_fields_set:
|
||||||
|
# All three absent → default naive
|
||||||
|
if self.parse_type is None and self.pipeline_id is None:
|
||||||
|
object.__setattr__(self, "chunk_method", "naive")
|
||||||
|
return self
|
||||||
|
# parser_id omitted: require BOTH parse_type & pipeline_id present (no partial allowed)
|
||||||
|
if self.parse_type is None or self.pipeline_id is None:
|
||||||
|
missing = []
|
||||||
|
if self.parse_type is None:
|
||||||
|
missing.append("parse_type")
|
||||||
|
if self.pipeline_id is None:
|
||||||
|
missing.append("pipeline_id")
|
||||||
|
raise PydanticCustomError(
|
||||||
|
"dependency_error",
|
||||||
|
"parser_id omitted → required fields missing: {fields}",
|
||||||
|
{"fields": ", ".join(missing)},
|
||||||
|
)
|
||||||
|
# Both provided → allow pipeline mode
|
||||||
|
return self
|
||||||
|
|
||||||
|
# parser_id provided (valid): MUST NOT have parse_type or pipeline_id
|
||||||
|
if isinstance(self.chunk_method, str):
|
||||||
|
if self.parse_type is not None or self.pipeline_id is not None:
|
||||||
|
invalid = []
|
||||||
|
if self.parse_type is not None:
|
||||||
|
invalid.append("parse_type")
|
||||||
|
if self.pipeline_id is not None:
|
||||||
|
invalid.append("pipeline_id")
|
||||||
|
raise PydanticCustomError(
|
||||||
|
"dependency_error",
|
||||||
|
"parser_id provided → disallowed fields present: {fields}",
|
||||||
|
{"fields": ", ".join(invalid)},
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
|
@field_validator("chunk_method", mode="wrap")
|
||||||
|
@classmethod
|
||||||
|
def validate_chunk_method(cls, v: Any, handler) -> Any:
|
||||||
|
"""Wrap validation to unify error messages, including type errors (e.g. list)."""
|
||||||
|
allowed = {"naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table", "tag"}
|
||||||
|
error_msg = "Input should be 'naive', 'book', 'email', 'laws', 'manual', 'one', 'paper', 'picture', 'presentation', 'qa', 'table' or 'tag'"
|
||||||
|
# Omitted field: handler won't be invoked (wrap still gets value); None treated as explicit invalid
|
||||||
|
if v is None:
|
||||||
|
raise PydanticCustomError("literal_error", error_msg)
|
||||||
|
try:
|
||||||
|
# Run inner validation (type checking)
|
||||||
|
result = handler(v)
|
||||||
|
except Exception:
|
||||||
|
raise PydanticCustomError("literal_error", error_msg)
|
||||||
|
# After handler, enforce enumeration
|
||||||
|
if not isinstance(result, str) or result == "" or result not in allowed:
|
||||||
|
raise PydanticCustomError("literal_error", error_msg)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
class UpdateDatasetReq(CreateDatasetReq):
|
class UpdateDatasetReq(CreateDatasetReq):
|
||||||
dataset_id: Annotated[str, Field(...)]
|
dataset_id: Annotated[str, Field(...)]
|
||||||
|
|
|
||||||
|
|
@ -419,7 +419,15 @@ Creates a dataset.
|
||||||
- `"embedding_model"`: `string`
|
- `"embedding_model"`: `string`
|
||||||
- `"permission"`: `string`
|
- `"permission"`: `string`
|
||||||
- `"chunk_method"`: `string`
|
- `"chunk_method"`: `string`
|
||||||
- `"parser_config"`: `object`
|
- "parser_config": `object`
|
||||||
|
- "parse_type": `int`
|
||||||
|
- "pipeline_id": `string`
|
||||||
|
|
||||||
|
Note: Choose exactly one ingestion mode when creating a dataset.
|
||||||
|
- Chunking method: provide `"chunk_method"` (optionally with `"parser_config"`).
|
||||||
|
- Ingestion pipeline: provide both `"parse_type"` and `"pipeline_id"` and do not provide `"chunk_method"`.
|
||||||
|
|
||||||
|
These options are mutually exclusive. If all three of `chunk_method`, `parse_type`, and `pipeline_id` are omitted, the system defaults to `chunk_method = "naive"`.
|
||||||
|
|
||||||
##### Request example
|
##### Request example
|
||||||
|
|
||||||
|
|
@ -433,6 +441,26 @@ curl --request POST \
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
##### Request example (ingestion pipeline)
|
||||||
|
|
||||||
|
Use this form when specifying an ingestion pipeline (do not include `chunk_method`).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --request POST \
|
||||||
|
--url http://{address}/api/v1/datasets \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: Bearer <YOUR_API_KEY>' \
|
||||||
|
--data '{
|
||||||
|
"name": "test-sdk",
|
||||||
|
"parse_type": <NUMBER_OF_FORMATS_IN_PARSE>,
|
||||||
|
"pipeline_id": "<PIPELINE_ID_32_HEX>"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- `parse_type` is an integer. Replace `<NUMBER_OF_FORMATS_IN_PARSE>` with your pipeline's parse-type value.
|
||||||
|
- `pipeline_id` must be a 32-character lowercase hexadecimal string.
|
||||||
|
|
||||||
##### Request parameters
|
##### Request parameters
|
||||||
|
|
||||||
- `"name"`: (*Body parameter*), `string`, *Required*
|
- `"name"`: (*Body parameter*), `string`, *Required*
|
||||||
|
|
@ -473,6 +501,7 @@ curl --request POST \
|
||||||
- `"qa"`: Q&A
|
- `"qa"`: Q&A
|
||||||
- `"table"`: Table
|
- `"table"`: Table
|
||||||
- `"tag"`: Tag
|
- `"tag"`: Tag
|
||||||
|
- Mutually exclusive with `parse_type` and `pipeline_id`. If you set `chunk_method`, do not include `parse_type` or `pipeline_id`.
|
||||||
|
|
||||||
- `"parser_config"`: (*Body parameter*), `object`
|
- `"parser_config"`: (*Body parameter*), `object`
|
||||||
The configuration settings for the dataset parser. The attributes in this JSON object vary with the selected `"chunk_method"`:
|
The configuration settings for the dataset parser. The attributes in this JSON object vary with the selected `"chunk_method"`:
|
||||||
|
|
@ -509,6 +538,15 @@ curl --request POST \
|
||||||
- Defaults to: `{"use_raptor": false}`.
|
- Defaults to: `{"use_raptor": false}`.
|
||||||
- If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
|
- If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
|
||||||
|
|
||||||
|
- "parse_type": (*Body parameter*), `int`
|
||||||
|
The ingestion pipeline parse type identifier. Required if and only if you are using an ingestion pipeline (together with `"pipeline_id"`). Must not be provided when `"chunk_method"` is set.
|
||||||
|
|
||||||
|
- "pipeline_id": (*Body parameter*), `string`
|
||||||
|
The ingestion pipeline ID. Required if and only if you are using an ingestion pipeline (together with `"parse_type"`).
|
||||||
|
- Must not be provided when `"chunk_method"` is set.
|
||||||
|
|
||||||
|
Note: If none of `chunk_method`, `parse_type`, and `pipeline_id` are provided, the system will default to `chunk_method = "naive"`.
|
||||||
|
|
||||||
#### Response
|
#### Response
|
||||||
|
|
||||||
Success:
|
Success:
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,7 @@ from deepdoc.parser.docling_parser import DoclingParser
|
||||||
from deepdoc.parser.tcadp_parser import TCADPParser
|
from deepdoc.parser.tcadp_parser import TCADPParser
|
||||||
from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, tokenize_chunks, tokenize_chunks_with_images, tokenize_table, attach_media_context
|
from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, tokenize_chunks, tokenize_chunks_with_images, tokenize_table, attach_media_context
|
||||||
|
|
||||||
|
|
||||||
def by_deepdoc(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, pdf_cls = None ,**kwargs):
|
def by_deepdoc(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, pdf_cls = None ,**kwargs):
|
||||||
callback = callback
|
callback = callback
|
||||||
binary = binary
|
binary = binary
|
||||||
|
|
@ -600,8 +601,7 @@ def load_from_xml_v2(baseURI, rels_item_xml):
|
||||||
srels._srels.append(_SerializedRelationship(baseURI, rel_elm))
|
srels._srels.append(_SerializedRelationship(baseURI, rel_elm))
|
||||||
return srels
|
return srels
|
||||||
|
|
||||||
def chunk(filename, binary=None, from_page=0, to_page=100000,
|
def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, **kwargs):
|
||||||
lang="Chinese", callback=None, **kwargs):
|
|
||||||
"""
|
"""
|
||||||
Supported file formats are docx, pdf, excel, txt.
|
Supported file formats are docx, pdf, excel, txt.
|
||||||
This method apply the naive ways to chunk files.
|
This method apply the naive ways to chunk files.
|
||||||
|
|
@ -611,14 +611,18 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||||
urls = set()
|
urls = set()
|
||||||
url_res = []
|
url_res = []
|
||||||
|
|
||||||
|
|
||||||
is_english = lang.lower() == "english" # is_english(cks)
|
is_english = lang.lower() == "english" # is_english(cks)
|
||||||
parser_config = kwargs.get(
|
parser_config = kwargs.get(
|
||||||
"parser_config", {
|
"parser_config", {
|
||||||
"chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": "DeepDOC", "analyze_hyperlink": True})
|
"chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": "DeepDOC", "analyze_hyperlink": True})
|
||||||
|
|
||||||
|
child_deli = re.findall(r"`([^`]+)`", parser_config.get("children_delimiter", ""))
|
||||||
|
child_deli = sorted(set(child_deli), key=lambda x: -len(x))
|
||||||
|
child_deli = "|".join(re.escape(t) for t in child_deli if t)
|
||||||
|
is_markdown = False
|
||||||
table_context_size = max(0, int(parser_config.get("table_context_size", 0) or 0))
|
table_context_size = max(0, int(parser_config.get("table_context_size", 0) or 0))
|
||||||
image_context_size = max(0, int(parser_config.get("image_context_size", 0) or 0))
|
image_context_size = max(0, int(parser_config.get("image_context_size", 0) or 0))
|
||||||
final_sections = False
|
|
||||||
doc = {
|
doc = {
|
||||||
"docnm_kwd": filename,
|
"docnm_kwd": filename,
|
||||||
"title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename))
|
"title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename))
|
||||||
|
|
@ -679,12 +683,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||||
"chunk_token_num", 128)), parser_config.get(
|
"chunk_token_num", 128)), parser_config.get(
|
||||||
"delimiter", "\n!?。;!?"))
|
"delimiter", "\n!?。;!?"))
|
||||||
|
|
||||||
if kwargs.get("section_only", False):
|
res.extend(tokenize_chunks_with_images(chunks, doc, is_english, images, child_delimiters_pattern=child_deli))
|
||||||
chunks.extend(embed_res)
|
|
||||||
chunks.extend(url_res)
|
|
||||||
return chunks
|
|
||||||
|
|
||||||
res.extend(tokenize_chunks_with_images(chunks, doc, is_english, images))
|
|
||||||
logging.info("naive_merge({}): {}".format(filename, timer() - st))
|
logging.info("naive_merge({}): {}".format(filename, timer() - st))
|
||||||
res.extend(embed_res)
|
res.extend(embed_res)
|
||||||
res.extend(url_res)
|
res.extend(url_res)
|
||||||
|
|
@ -780,7 +779,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||||
return_section_images=True,
|
return_section_images=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
final_sections = True
|
is_markdown = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||||
|
|
@ -857,7 +856,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||||
"file type not supported yet(pdf, xlsx, doc, docx, txt supported)")
|
"file type not supported yet(pdf, xlsx, doc, docx, txt supported)")
|
||||||
|
|
||||||
st = timer()
|
st = timer()
|
||||||
if final_sections:
|
if is_markdown:
|
||||||
merged_chunks = []
|
merged_chunks = []
|
||||||
merged_images = []
|
merged_images = []
|
||||||
chunk_limit = max(0, int(parser_config.get("chunk_token_num", 128)))
|
chunk_limit = max(0, int(parser_config.get("chunk_token_num", 128)))
|
||||||
|
|
@ -900,13 +899,11 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||||
|
|
||||||
chunks = merged_chunks
|
chunks = merged_chunks
|
||||||
has_images = merged_images and any(img is not None for img in merged_images)
|
has_images = merged_images and any(img is not None for img in merged_images)
|
||||||
if kwargs.get("section_only", False):
|
|
||||||
chunks.extend(embed_res)
|
|
||||||
return chunks
|
|
||||||
if has_images:
|
if has_images:
|
||||||
res.extend(tokenize_chunks_with_images(chunks, doc, is_english, merged_images))
|
res.extend(tokenize_chunks_with_images(chunks, doc, is_english, merged_images, child_delimiters_pattern=child_deli))
|
||||||
else:
|
else:
|
||||||
res.extend(tokenize_chunks(chunks, doc, is_english, pdf_parser))
|
res.extend(tokenize_chunks(chunks, doc, is_english, pdf_parser, child_delimiters_pattern=child_deli))
|
||||||
else:
|
else:
|
||||||
if section_images:
|
if section_images:
|
||||||
if all(image is None for image in section_images):
|
if all(image is None for image in section_images):
|
||||||
|
|
@ -917,21 +914,14 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||||
int(parser_config.get(
|
int(parser_config.get(
|
||||||
"chunk_token_num", 128)), parser_config.get(
|
"chunk_token_num", 128)), parser_config.get(
|
||||||
"delimiter", "\n!?。;!?"))
|
"delimiter", "\n!?。;!?"))
|
||||||
if kwargs.get("section_only", False):
|
res.extend(tokenize_chunks_with_images(chunks, doc, is_english, images, child_delimiters_pattern=child_deli))
|
||||||
chunks.extend(embed_res)
|
|
||||||
return chunks
|
|
||||||
|
|
||||||
res.extend(tokenize_chunks_with_images(chunks, doc, is_english, images))
|
|
||||||
else:
|
else:
|
||||||
chunks = naive_merge(
|
chunks = naive_merge(
|
||||||
sections, int(parser_config.get(
|
sections, int(parser_config.get(
|
||||||
"chunk_token_num", 128)), parser_config.get(
|
"chunk_token_num", 128)), parser_config.get(
|
||||||
"delimiter", "\n!?。;!?"))
|
"delimiter", "\n!?。;!?"))
|
||||||
if kwargs.get("section_only", False):
|
|
||||||
chunks.extend(embed_res)
|
|
||||||
return chunks
|
|
||||||
|
|
||||||
res.extend(tokenize_chunks(chunks, doc, is_english, pdf_parser))
|
res.extend(tokenize_chunks(chunks, doc, is_english, pdf_parser, child_delimiters_pattern=child_deli))
|
||||||
|
|
||||||
if urls and parser_config.get("analyze_hyperlink", False) and is_root:
|
if urls and parser_config.get("analyze_hyperlink", False) and is_root:
|
||||||
for index, url in enumerate(urls):
|
for index, url in enumerate(urls):
|
||||||
|
|
|
||||||
|
|
@ -13,10 +13,10 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
|
from copy import deepcopy
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
import trio
|
import trio
|
||||||
|
|
||||||
from common.misc_utils import get_uuid
|
from common.misc_utils import get_uuid
|
||||||
from rag.utils.base64_image import id2image, image2id
|
from rag.utils.base64_image import id2image, image2id
|
||||||
from deepdoc.parser.pdf_parser import RAGFlowPdfParser
|
from deepdoc.parser.pdf_parser import RAGFlowPdfParser
|
||||||
|
|
@ -32,6 +32,7 @@ class SplitterParam(ProcessParamBase):
|
||||||
self.chunk_token_size = 512
|
self.chunk_token_size = 512
|
||||||
self.delimiters = ["\n"]
|
self.delimiters = ["\n"]
|
||||||
self.overlapped_percent = 0
|
self.overlapped_percent = 0
|
||||||
|
self.children_delimiters = []
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
self.check_empty(self.delimiters, "Delimiters.")
|
self.check_empty(self.delimiters, "Delimiters.")
|
||||||
|
|
@ -58,6 +59,14 @@ class Splitter(ProcessBase):
|
||||||
deli += f"`{d}`"
|
deli += f"`{d}`"
|
||||||
else:
|
else:
|
||||||
deli += d
|
deli += d
|
||||||
|
child_deli = ""
|
||||||
|
for d in self._param.children_delimiters:
|
||||||
|
if len(d) > 1:
|
||||||
|
child_deli += f"`{d}`"
|
||||||
|
else:
|
||||||
|
child_deli += d
|
||||||
|
child_deli = [m.group(1) for m in re.finditer(r"`([^`]+)`", child_deli)]
|
||||||
|
custom_pattern = "|".join(re.escape(t) for t in sorted(set(child_deli), key=len, reverse=True))
|
||||||
|
|
||||||
self.set_output("output_format", "chunks")
|
self.set_output("output_format", "chunks")
|
||||||
self.callback(random.randint(1, 5) / 100.0, "Start to split into chunks.")
|
self.callback(random.randint(1, 5) / 100.0, "Start to split into chunks.")
|
||||||
|
|
@ -78,7 +87,23 @@ class Splitter(ProcessBase):
|
||||||
deli,
|
deli,
|
||||||
self._param.overlapped_percent,
|
self._param.overlapped_percent,
|
||||||
)
|
)
|
||||||
self.set_output("chunks", [{"text": c.strip()} for c in cks if c.strip()])
|
if custom_pattern:
|
||||||
|
docs = []
|
||||||
|
for c in cks:
|
||||||
|
if not c.strip():
|
||||||
|
continue
|
||||||
|
split_sec = re.split(r"(%s)" % custom_pattern, c, flags=re.DOTALL)
|
||||||
|
if split_sec:
|
||||||
|
for txt in split_sec:
|
||||||
|
docs.append({
|
||||||
|
"text": txt,
|
||||||
|
"mom": c
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
docs.append({"text": c})
|
||||||
|
self.set_output("chunks", docs)
|
||||||
|
else:
|
||||||
|
self.set_output("chunks", [{"text": c.strip()} for c in cks if c.strip()])
|
||||||
|
|
||||||
self.callback(1, "Done.")
|
self.callback(1, "Done.")
|
||||||
return
|
return
|
||||||
|
|
@ -100,12 +125,27 @@ class Splitter(ProcessBase):
|
||||||
{
|
{
|
||||||
"text": RAGFlowPdfParser.remove_tag(c),
|
"text": RAGFlowPdfParser.remove_tag(c),
|
||||||
"image": img,
|
"image": img,
|
||||||
"positions": [[pos[0][-1]+1, *pos[1:]] for pos in RAGFlowPdfParser.extract_positions(c)],
|
"positions": [[pos[0][-1]+1, *pos[1:]] for pos in RAGFlowPdfParser.extract_positions(c)]
|
||||||
}
|
}
|
||||||
for c, img in zip(chunks, images) if c.strip()
|
for c, img in zip(chunks, images) if c.strip()
|
||||||
]
|
]
|
||||||
async with trio.open_nursery() as nursery:
|
async with trio.open_nursery() as nursery:
|
||||||
for d in cks:
|
for d in cks:
|
||||||
nursery.start_soon(image2id, d, partial(settings.STORAGE_IMPL.put, tenant_id=self._canvas._tenant_id), get_uuid())
|
nursery.start_soon(image2id, d, partial(settings.STORAGE_IMPL.put, tenant_id=self._canvas._tenant_id), get_uuid())
|
||||||
self.set_output("chunks", cks)
|
|
||||||
|
if custom_pattern:
|
||||||
|
docs = []
|
||||||
|
for c in cks:
|
||||||
|
split_sec = re.split(r"(%s)" % custom_pattern, c["text"], flags=re.DOTALL)
|
||||||
|
if split_sec:
|
||||||
|
c["mom"] = c["text"]
|
||||||
|
for txt in split_sec:
|
||||||
|
cc = deepcopy(c)
|
||||||
|
cc["text"] = txt
|
||||||
|
docs.append(cc)
|
||||||
|
else:
|
||||||
|
docs.append(c)
|
||||||
|
self.set_output("chunks", docs)
|
||||||
|
else:
|
||||||
|
self.set_output("chunks", cks)
|
||||||
self.callback(1, "Done.")
|
self.callback(1, "Done.")
|
||||||
|
|
|
||||||
|
|
@ -264,14 +264,14 @@ def is_chinese(text):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def tokenize(d, t, eng):
|
def tokenize(d, txt, eng):
|
||||||
d["content_with_weight"] = t
|
d["content_with_weight"] = txt
|
||||||
t = re.sub(r"</?(table|td|caption|tr|th)( [^<>]{0,12})?>", " ", t)
|
t = re.sub(r"</?(table|td|caption|tr|th)( [^<>]{0,12})?>", " ", txt)
|
||||||
d["content_ltks"] = rag_tokenizer.tokenize(t)
|
d["content_ltks"] = rag_tokenizer.tokenize(t)
|
||||||
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
||||||
|
|
||||||
|
|
||||||
def tokenize_chunks(chunks, doc, eng, pdf_parser=None):
|
def tokenize_chunks(chunks, doc, eng, pdf_parser=None, child_delimiters_pattern=None):
|
||||||
res = []
|
res = []
|
||||||
# wrap up as es documents
|
# wrap up as es documents
|
||||||
for ii, ck in enumerate(chunks):
|
for ii, ck in enumerate(chunks):
|
||||||
|
|
@ -288,12 +288,21 @@ def tokenize_chunks(chunks, doc, eng, pdf_parser=None):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
add_positions(d, [[ii]*5])
|
add_positions(d, [[ii]*5])
|
||||||
|
|
||||||
|
if child_delimiters_pattern:
|
||||||
|
d["mom_with_weight"] = ck
|
||||||
|
for txt in re.split(r"(%s)" % child_delimiters_pattern, ck, flags=re.DOTALL):
|
||||||
|
dd = copy.deepcopy(d)
|
||||||
|
tokenize(dd, txt, eng)
|
||||||
|
res.append(dd)
|
||||||
|
continue
|
||||||
|
|
||||||
tokenize(d, ck, eng)
|
tokenize(d, ck, eng)
|
||||||
res.append(d)
|
res.append(d)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
def tokenize_chunks_with_images(chunks, doc, eng, images):
|
def tokenize_chunks_with_images(chunks, doc, eng, images, child_delimiters_pattern=None):
|
||||||
res = []
|
res = []
|
||||||
# wrap up as es documents
|
# wrap up as es documents
|
||||||
for ii, (ck, image) in enumerate(zip(chunks, images)):
|
for ii, (ck, image) in enumerate(zip(chunks, images)):
|
||||||
|
|
@ -303,6 +312,13 @@ def tokenize_chunks_with_images(chunks, doc, eng, images):
|
||||||
d = copy.deepcopy(doc)
|
d = copy.deepcopy(doc)
|
||||||
d["image"] = image
|
d["image"] = image
|
||||||
add_positions(d, [[ii]*5])
|
add_positions(d, [[ii]*5])
|
||||||
|
if child_delimiters_pattern:
|
||||||
|
d["mom_with_weight"] = ck
|
||||||
|
for txt in re.split(r"(%s)" % child_delimiters_pattern, ck, flags=re.DOTALL):
|
||||||
|
dd = copy.deepcopy(d)
|
||||||
|
tokenize(dd, txt, eng)
|
||||||
|
res.append(dd)
|
||||||
|
continue
|
||||||
tokenize(d, ck, eng)
|
tokenize(d, ck, eng)
|
||||||
res.append(d)
|
res.append(d)
|
||||||
return res
|
return res
|
||||||
|
|
|
||||||
|
|
@ -128,9 +128,6 @@ def signal_handler(sig, frame):
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def set_progress(task_id, from_page=0, to_page=-1, prog=None, msg="Processing..."):
|
def set_progress(task_id, from_page=0, to_page=-1, prog=None, msg="Processing..."):
|
||||||
try:
|
try:
|
||||||
if prog is not None and prog < 0:
|
if prog is not None and prog < 0:
|
||||||
|
|
@ -720,6 +717,34 @@ async def delete_image(kb_id, chunk_id):
|
||||||
|
|
||||||
|
|
||||||
async def insert_es(task_id, task_tenant_id, task_dataset_id, chunks, progress_callback):
|
async def insert_es(task_id, task_tenant_id, task_dataset_id, chunks, progress_callback):
|
||||||
|
mothers = []
|
||||||
|
mother_ids = set([])
|
||||||
|
for ck in chunks:
|
||||||
|
mom = ck.get("mom") or ck.get("mom_with_weight") or ""
|
||||||
|
if not mom:
|
||||||
|
continue
|
||||||
|
id = xxhash.xxh64(mom.encode("utf-8")).hexdigest()
|
||||||
|
if id in mother_ids:
|
||||||
|
continue
|
||||||
|
mother_ids.add(id)
|
||||||
|
ck["mom_id"] = id
|
||||||
|
mom_ck = copy.deepcopy(ck)
|
||||||
|
mom_ck["id"] = id
|
||||||
|
mom_ck["content_with_weight"] = mom
|
||||||
|
mom_ck["available_int"] = 0
|
||||||
|
flds = list(mom_ck.keys())
|
||||||
|
for fld in flds:
|
||||||
|
if fld not in ["id", "content_with_weight", "doc_id", "kb_id", "available_int"]:
|
||||||
|
del mom_ck[fld]
|
||||||
|
mothers.append(mom_ck)
|
||||||
|
|
||||||
|
for b in range(0, len(mothers), settings.DOC_BULK_SIZE):
|
||||||
|
await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(mothers[b:b + settings.DOC_BULK_SIZE], search.index_name(task_tenant_id), task_dataset_id))
|
||||||
|
task_canceled = has_canceled(task_id)
|
||||||
|
if task_canceled:
|
||||||
|
progress_callback(-1, msg="Task has been canceled.")
|
||||||
|
return False
|
||||||
|
|
||||||
for b in range(0, len(chunks), settings.DOC_BULK_SIZE):
|
for b in range(0, len(chunks), settings.DOC_BULK_SIZE):
|
||||||
doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(chunks[b:b + settings.DOC_BULK_SIZE], search.index_name(task_tenant_id), task_dataset_id))
|
doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(chunks[b:b + settings.DOC_BULK_SIZE], search.index_name(task_tenant_id), task_dataset_id))
|
||||||
task_canceled = has_canceled(task_id)
|
task_canceled = has_canceled(task_id)
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,10 @@
|
||||||
import PdfDrawer from '@/components/pdf-drawer';
|
import PdfSheet from '@/components/pdf-drawer';
|
||||||
import { useClickDrawer } from '@/components/pdf-drawer/hooks';
|
import { useClickDrawer } from '@/components/pdf-drawer/hooks';
|
||||||
import { MessageType } from '@/constants/chat';
|
import { MessageType, SharedFrom } from '@/constants/chat';
|
||||||
|
import { useFetchExternalAgentInputs } from '@/hooks/use-agent-request';
|
||||||
import { useFetchExternalChatInfo } from '@/hooks/use-chat-request';
|
import { useFetchExternalChatInfo } from '@/hooks/use-chat-request';
|
||||||
import i18n from '@/locales/config';
|
import i18n from '@/locales/config';
|
||||||
|
import { useSendNextSharedMessage } from '@/pages/agent/hooks/use-send-shared-message';
|
||||||
import { MessageCircle, Minimize2, Send, X } from 'lucide-react';
|
import { MessageCircle, Minimize2, Send, X } from 'lucide-react';
|
||||||
import React, { useCallback, useEffect, useRef, useState } from 'react';
|
import React, { useCallback, useEffect, useRef, useState } from 'react';
|
||||||
import {
|
import {
|
||||||
|
|
@ -20,7 +22,13 @@ const FloatingChatWidget = () => {
|
||||||
const [isLoaded, setIsLoaded] = useState(false);
|
const [isLoaded, setIsLoaded] = useState(false);
|
||||||
const messagesEndRef = useRef<HTMLDivElement>(null);
|
const messagesEndRef = useRef<HTMLDivElement>(null);
|
||||||
|
|
||||||
const { sharedId: conversationId, locale } = useGetSharedChatSearchParams();
|
const {
|
||||||
|
sharedId: conversationId,
|
||||||
|
locale,
|
||||||
|
from,
|
||||||
|
} = useGetSharedChatSearchParams();
|
||||||
|
|
||||||
|
const isFromAgent = from === SharedFrom.Agent;
|
||||||
|
|
||||||
// Check if we're in button-only mode or window-only mode
|
// Check if we're in button-only mode or window-only mode
|
||||||
const urlParams = new URLSearchParams(window.location.search);
|
const urlParams = new URLSearchParams(window.location.search);
|
||||||
|
|
@ -34,7 +42,7 @@ const FloatingChatWidget = () => {
|
||||||
sendLoading,
|
sendLoading,
|
||||||
derivedMessages,
|
derivedMessages,
|
||||||
hasError,
|
hasError,
|
||||||
} = useSendSharedMessage();
|
} = (isFromAgent ? useSendNextSharedMessage : useSendSharedMessage)(() => {});
|
||||||
|
|
||||||
// Sync our local input with the hook's value when needed
|
// Sync our local input with the hook's value when needed
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
|
@ -43,7 +51,11 @@ const FloatingChatWidget = () => {
|
||||||
}
|
}
|
||||||
}, [hookValue, inputValue]);
|
}, [hookValue, inputValue]);
|
||||||
|
|
||||||
const { data: chatInfo } = useFetchExternalChatInfo();
|
const { data } = (
|
||||||
|
isFromAgent ? useFetchExternalAgentInputs : useFetchExternalChatInfo
|
||||||
|
)();
|
||||||
|
|
||||||
|
const title = data.title;
|
||||||
|
|
||||||
const { visible, hideModal, documentId, selectedChunk, clickDocumentButton } =
|
const { visible, hideModal, documentId, selectedChunk, clickDocumentButton } =
|
||||||
useClickDrawer();
|
useClickDrawer();
|
||||||
|
|
@ -372,7 +384,7 @@ const FloatingChatWidget = () => {
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<h3 className="font-semibold text-sm">
|
<h3 className="font-semibold text-sm">
|
||||||
{chatInfo?.title || 'Chat Support'}
|
{title || 'Chat Support'}
|
||||||
</h3>
|
</h3>
|
||||||
<p className="text-xs text-blue-100">
|
<p className="text-xs text-blue-100">
|
||||||
We typically reply instantly
|
We typically reply instantly
|
||||||
|
|
@ -494,14 +506,16 @@ const FloatingChatWidget = () => {
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<PdfDrawer
|
{visible && (
|
||||||
visible={visible}
|
<PdfSheet
|
||||||
hideModal={hideModal}
|
visible={visible}
|
||||||
documentId={documentId}
|
hideModal={hideModal}
|
||||||
chunk={selectedChunk}
|
documentId={documentId}
|
||||||
width={'100vw'}
|
chunk={selectedChunk}
|
||||||
height={'100vh'}
|
width={'100vw'}
|
||||||
/>
|
height={'100vh'}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
} // Full mode - render everything together (original behavior)
|
} // Full mode - render everything together (original behavior)
|
||||||
|
|
@ -524,7 +538,7 @@ const FloatingChatWidget = () => {
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<h3 className="font-semibold text-sm">
|
<h3 className="font-semibold text-sm">
|
||||||
{chatInfo?.title || 'Chat Support'}
|
{title || 'Chat Support'}
|
||||||
</h3>
|
</h3>
|
||||||
<p className="text-xs text-blue-100">
|
<p className="text-xs text-blue-100">
|
||||||
We typically reply instantly
|
We typically reply instantly
|
||||||
|
|
@ -695,7 +709,7 @@ const FloatingChatWidget = () => {
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
<PdfDrawer
|
<PdfSheet
|
||||||
visible={visible}
|
visible={visible}
|
||||||
hideModal={hideModal}
|
hideModal={hideModal}
|
||||||
documentId={documentId}
|
documentId={documentId}
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,6 @@ import Image from '@/components/image';
|
||||||
import SvgIcon from '@/components/svg-icon';
|
import SvgIcon from '@/components/svg-icon';
|
||||||
import { IReference, IReferenceChunk } from '@/interfaces/database/chat';
|
import { IReference, IReferenceChunk } from '@/interfaces/database/chat';
|
||||||
import { getExtension } from '@/utils/document-util';
|
import { getExtension } from '@/utils/document-util';
|
||||||
import { InfoCircleOutlined } from '@ant-design/icons';
|
|
||||||
import { Button, Flex, Popover } from 'antd';
|
|
||||||
import DOMPurify from 'dompurify';
|
import DOMPurify from 'dompurify';
|
||||||
import { useCallback, useEffect, useMemo } from 'react';
|
import { useCallback, useEffect, useMemo } from 'react';
|
||||||
import Markdown from 'react-markdown';
|
import Markdown from 'react-markdown';
|
||||||
|
|
@ -27,10 +25,16 @@ import {
|
||||||
replaceThinkToSection,
|
replaceThinkToSection,
|
||||||
showImage,
|
showImage,
|
||||||
} from '@/utils/chat';
|
} from '@/utils/chat';
|
||||||
|
|
||||||
import classNames from 'classnames';
|
import classNames from 'classnames';
|
||||||
import { omit } from 'lodash';
|
import { omit } from 'lodash';
|
||||||
import { pipe } from 'lodash/fp';
|
import { pipe } from 'lodash/fp';
|
||||||
|
import { CircleAlert } from 'lucide-react';
|
||||||
|
import { Button } from '../ui/button';
|
||||||
|
import {
|
||||||
|
HoverCard,
|
||||||
|
HoverCardContent,
|
||||||
|
HoverCardTrigger,
|
||||||
|
} from '../ui/hover-card';
|
||||||
import styles from './index.less';
|
import styles from './index.less';
|
||||||
|
|
||||||
const getChunkIndex = (match: string) => Number(match);
|
const getChunkIndex = (match: string) => Number(match);
|
||||||
|
|
@ -145,20 +149,20 @@ const MarkdownContent = ({
|
||||||
return (
|
return (
|
||||||
<div key={chunkItem?.id} className="flex gap-2">
|
<div key={chunkItem?.id} className="flex gap-2">
|
||||||
{imageId && (
|
{imageId && (
|
||||||
<Popover
|
<HoverCard>
|
||||||
placement="left"
|
<HoverCardTrigger>
|
||||||
content={
|
<Image
|
||||||
|
id={imageId}
|
||||||
|
className={styles.referenceChunkImage}
|
||||||
|
></Image>
|
||||||
|
</HoverCardTrigger>
|
||||||
|
<HoverCardContent>
|
||||||
<Image
|
<Image
|
||||||
id={imageId}
|
id={imageId}
|
||||||
className={styles.referenceImagePreview}
|
className={styles.referenceImagePreview}
|
||||||
></Image>
|
></Image>
|
||||||
}
|
</HoverCardContent>
|
||||||
>
|
</HoverCard>
|
||||||
<Image
|
|
||||||
id={imageId}
|
|
||||||
className={styles.referenceChunkImage}
|
|
||||||
></Image>
|
|
||||||
</Popover>
|
|
||||||
)}
|
)}
|
||||||
<div className={'space-y-2 max-w-[40vw]'}>
|
<div className={'space-y-2 max-w-[40vw]'}>
|
||||||
<div
|
<div
|
||||||
|
|
@ -168,7 +172,7 @@ const MarkdownContent = ({
|
||||||
className={classNames(styles.chunkContentText)}
|
className={classNames(styles.chunkContentText)}
|
||||||
></div>
|
></div>
|
||||||
{documentId && (
|
{documentId && (
|
||||||
<Flex gap={'small'}>
|
<section className="flex gap-1">
|
||||||
{fileThumbnail ? (
|
{fileThumbnail ? (
|
||||||
<img
|
<img
|
||||||
src={fileThumbnail}
|
src={fileThumbnail}
|
||||||
|
|
@ -182,8 +186,8 @@ const MarkdownContent = ({
|
||||||
></SvgIcon>
|
></SvgIcon>
|
||||||
)}
|
)}
|
||||||
<Button
|
<Button
|
||||||
type="link"
|
variant="link"
|
||||||
className={classNames(styles.documentLink, 'text-wrap')}
|
className={'text-wrap p-0'}
|
||||||
onClick={handleDocumentButtonClick(
|
onClick={handleDocumentButtonClick(
|
||||||
documentId,
|
documentId,
|
||||||
chunkItem,
|
chunkItem,
|
||||||
|
|
@ -193,7 +197,7 @@ const MarkdownContent = ({
|
||||||
>
|
>
|
||||||
{document?.doc_name}
|
{document?.doc_name}
|
||||||
</Button>
|
</Button>
|
||||||
</Flex>
|
</section>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
@ -228,9 +232,14 @@ const MarkdownContent = ({
|
||||||
}
|
}
|
||||||
></Image>
|
></Image>
|
||||||
) : (
|
) : (
|
||||||
<Popover content={getPopoverContent(chunkIndex)} key={i}>
|
<HoverCard key={i}>
|
||||||
<InfoCircleOutlined className={styles.referenceIcon} />
|
<HoverCardTrigger>
|
||||||
</Popover>
|
<CircleAlert className="size-4 inline-block" />
|
||||||
|
</HoverCardTrigger>
|
||||||
|
<HoverCardContent className="max-w-3xl">
|
||||||
|
{getPopoverContent(chunkIndex)}
|
||||||
|
</HoverCardContent>
|
||||||
|
</HoverCard>
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,10 +14,10 @@ import {
|
||||||
} from '@/hooks/document-hooks';
|
} from '@/hooks/document-hooks';
|
||||||
import { IRegenerateMessage, IRemoveMessageById } from '@/hooks/logic-hooks';
|
import { IRegenerateMessage, IRemoveMessageById } from '@/hooks/logic-hooks';
|
||||||
import { cn } from '@/lib/utils';
|
import { cn } from '@/lib/utils';
|
||||||
import { Avatar, Flex, Space } from 'antd';
|
|
||||||
import MarkdownContent from '../markdown-content';
|
import MarkdownContent from '../markdown-content';
|
||||||
import { ReferenceDocumentList } from '../next-message-item/reference-document-list';
|
import { ReferenceDocumentList } from '../next-message-item/reference-document-list';
|
||||||
import { InnerUploadedMessageFiles } from '../next-message-item/uploaded-message-files';
|
import { InnerUploadedMessageFiles } from '../next-message-item/uploaded-message-files';
|
||||||
|
import { RAGFlowAvatar } from '../ragflow-avatar';
|
||||||
import { useTheme } from '../theme-provider';
|
import { useTheme } from '../theme-provider';
|
||||||
import { AssistantGroupButton, UserGroupButton } from './group-button';
|
import { AssistantGroupButton, UserGroupButton } from './group-button';
|
||||||
import styles from './index.less';
|
import styles from './index.less';
|
||||||
|
|
@ -98,40 +98,43 @@ const MessageItem = ({
|
||||||
>
|
>
|
||||||
{visibleAvatar &&
|
{visibleAvatar &&
|
||||||
(item.role === MessageType.User ? (
|
(item.role === MessageType.User ? (
|
||||||
<Avatar size={40} src={avatar ?? '/logo.svg'} />
|
<RAGFlowAvatar
|
||||||
|
className="size-10"
|
||||||
|
avatar={avatar ?? '/logo.svg'}
|
||||||
|
isPerson
|
||||||
|
/>
|
||||||
) : avatarDialog ? (
|
) : avatarDialog ? (
|
||||||
<Avatar size={40} src={avatarDialog} />
|
<RAGFlowAvatar
|
||||||
|
className="size-10"
|
||||||
|
avatar={avatarDialog}
|
||||||
|
isPerson
|
||||||
|
/>
|
||||||
) : (
|
) : (
|
||||||
<AssistantIcon />
|
<AssistantIcon />
|
||||||
))}
|
))}
|
||||||
|
|
||||||
<Flex vertical gap={8} flex={1}>
|
<section className="flex gap-2 flex-1 flex-col">
|
||||||
<Space>
|
{isAssistant ? (
|
||||||
{isAssistant ? (
|
index !== 0 && (
|
||||||
index !== 0 && (
|
<AssistantGroupButton
|
||||||
<AssistantGroupButton
|
|
||||||
messageId={item.id}
|
|
||||||
content={item.content}
|
|
||||||
prompt={item.prompt}
|
|
||||||
showLikeButton={showLikeButton}
|
|
||||||
audioBinary={item.audio_binary}
|
|
||||||
showLoudspeaker={showLoudspeaker}
|
|
||||||
></AssistantGroupButton>
|
|
||||||
)
|
|
||||||
) : (
|
|
||||||
<UserGroupButton
|
|
||||||
content={item.content}
|
|
||||||
messageId={item.id}
|
messageId={item.id}
|
||||||
removeMessageById={removeMessageById}
|
content={item.content}
|
||||||
regenerateMessage={
|
prompt={item.prompt}
|
||||||
regenerateMessage && handleRegenerateMessage
|
showLikeButton={showLikeButton}
|
||||||
}
|
audioBinary={item.audio_binary}
|
||||||
sendLoading={sendLoading}
|
showLoudspeaker={showLoudspeaker}
|
||||||
></UserGroupButton>
|
></AssistantGroupButton>
|
||||||
)}
|
)
|
||||||
|
) : (
|
||||||
|
<UserGroupButton
|
||||||
|
content={item.content}
|
||||||
|
messageId={item.id}
|
||||||
|
removeMessageById={removeMessageById}
|
||||||
|
regenerateMessage={regenerateMessage && handleRegenerateMessage}
|
||||||
|
sendLoading={sendLoading}
|
||||||
|
></UserGroupButton>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* <b>{isAssistant ? '' : nickname}</b> */}
|
|
||||||
</Space>
|
|
||||||
<div
|
<div
|
||||||
className={cn(
|
className={cn(
|
||||||
isAssistant
|
isAssistant
|
||||||
|
|
@ -159,7 +162,7 @@ const MessageItem = ({
|
||||||
files={documentList}
|
files={documentList}
|
||||||
></InnerUploadedMessageFiles>
|
></InnerUploadedMessageFiles>
|
||||||
)}
|
)}
|
||||||
</Flex>
|
</section>
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
import { IModalProps } from '@/interfaces/common';
|
import { IModalProps } from '@/interfaces/common';
|
||||||
import { IReferenceChunk } from '@/interfaces/database/chat';
|
import { IReferenceChunk } from '@/interfaces/database/chat';
|
||||||
import { IChunk } from '@/interfaces/database/knowledge';
|
import { IChunk } from '@/interfaces/database/knowledge';
|
||||||
import { Drawer } from 'antd';
|
import { cn } from '@/lib/utils';
|
||||||
import DocumentPreviewer from '../pdf-previewer';
|
import DocumentPreviewer from '../pdf-previewer';
|
||||||
|
import { Sheet, SheetContent, SheetHeader, SheetTitle } from '../ui/sheet';
|
||||||
|
|
||||||
interface IProps extends IModalProps<any> {
|
interface IProps extends IModalProps<any> {
|
||||||
documentId: string;
|
documentId: string;
|
||||||
|
|
@ -11,7 +12,7 @@ interface IProps extends IModalProps<any> {
|
||||||
height?: string | number;
|
height?: string | number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const PdfDrawer = ({
|
export const PdfSheet = ({
|
||||||
visible = false,
|
visible = false,
|
||||||
hideModal,
|
hideModal,
|
||||||
documentId,
|
documentId,
|
||||||
|
|
@ -20,20 +21,25 @@ export const PdfDrawer = ({
|
||||||
height,
|
height,
|
||||||
}: IProps) => {
|
}: IProps) => {
|
||||||
return (
|
return (
|
||||||
<Drawer
|
<Sheet open onOpenChange={hideModal}>
|
||||||
title="Document Previewer"
|
<SheetContent
|
||||||
onClose={hideModal}
|
className={cn(`max-w-full`)}
|
||||||
open={visible}
|
style={{
|
||||||
width={width}
|
width: width,
|
||||||
height={height}
|
height: height ? height : undefined,
|
||||||
>
|
}}
|
||||||
<DocumentPreviewer
|
>
|
||||||
documentId={documentId}
|
<SheetHeader>
|
||||||
chunk={chunk}
|
<SheetTitle>Document Previewer</SheetTitle>
|
||||||
visible={visible}
|
</SheetHeader>
|
||||||
></DocumentPreviewer>
|
<DocumentPreviewer
|
||||||
</Drawer>
|
documentId={documentId}
|
||||||
|
chunk={chunk}
|
||||||
|
visible={visible}
|
||||||
|
></DocumentPreviewer>
|
||||||
|
</SheetContent>
|
||||||
|
</Sheet>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
export default PdfDrawer;
|
export default PdfSheet;
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import { useSendAgentMessage } from './use-send-agent-message';
|
||||||
import { FileUploadProps } from '@/components/file-upload';
|
import { FileUploadProps } from '@/components/file-upload';
|
||||||
import { NextMessageInput } from '@/components/message-input/next';
|
import { NextMessageInput } from '@/components/message-input/next';
|
||||||
import MessageItem from '@/components/next-message-item';
|
import MessageItem from '@/components/next-message-item';
|
||||||
import PdfDrawer from '@/components/pdf-drawer';
|
import PdfSheet from '@/components/pdf-drawer';
|
||||||
import { useClickDrawer } from '@/components/pdf-drawer/hooks';
|
import { useClickDrawer } from '@/components/pdf-drawer/hooks';
|
||||||
import {
|
import {
|
||||||
useFetchAgent,
|
useFetchAgent,
|
||||||
|
|
@ -127,12 +127,14 @@ function AgentChatBox() {
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
</section>
|
</section>
|
||||||
<PdfDrawer
|
{visible && (
|
||||||
visible={visible}
|
<PdfSheet
|
||||||
hideModal={hideModal}
|
visible={visible}
|
||||||
documentId={documentId}
|
hideModal={hideModal}
|
||||||
chunk={selectedChunk}
|
documentId={documentId}
|
||||||
></PdfDrawer>
|
chunk={selectedChunk}
|
||||||
|
></PdfSheet>
|
||||||
|
)}
|
||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ import { EmbedContainer } from '@/components/embed-container';
|
||||||
import { FileUploadProps } from '@/components/file-upload';
|
import { FileUploadProps } from '@/components/file-upload';
|
||||||
import { NextMessageInput } from '@/components/message-input/next';
|
import { NextMessageInput } from '@/components/message-input/next';
|
||||||
import MessageItem from '@/components/next-message-item';
|
import MessageItem from '@/components/next-message-item';
|
||||||
import PdfDrawer from '@/components/pdf-drawer';
|
import PdfSheet from '@/components/pdf-drawer';
|
||||||
import { useClickDrawer } from '@/components/pdf-drawer/hooks';
|
import { useClickDrawer } from '@/components/pdf-drawer/hooks';
|
||||||
import { MessageType } from '@/constants/chat';
|
import { MessageType } from '@/constants/chat';
|
||||||
import { useUploadCanvasFileWithProgress } from '@/hooks/use-agent-request';
|
import { useUploadCanvasFileWithProgress } from '@/hooks/use-agent-request';
|
||||||
|
|
@ -204,12 +204,12 @@ const ChatContainer = () => {
|
||||||
</div>
|
</div>
|
||||||
</EmbedContainer>
|
</EmbedContainer>
|
||||||
{visible && (
|
{visible && (
|
||||||
<PdfDrawer
|
<PdfSheet
|
||||||
visible={visible}
|
visible={visible}
|
||||||
hideModal={hideModal}
|
hideModal={hideModal}
|
||||||
documentId={documentId}
|
documentId={documentId}
|
||||||
chunk={selectedChunk}
|
chunk={selectedChunk}
|
||||||
></PdfDrawer>
|
></PdfSheet>
|
||||||
)}
|
)}
|
||||||
{parameterDialogVisible && (
|
{parameterDialogVisible && (
|
||||||
<ParameterDialog
|
<ParameterDialog
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ import { LargeModelFormFieldWithoutFilter } from '@/components/large-model-form-
|
||||||
import { LlmSettingSchema } from '@/components/llm-setting-items/next';
|
import { LlmSettingSchema } from '@/components/llm-setting-items/next';
|
||||||
import { NextMessageInput } from '@/components/message-input/next';
|
import { NextMessageInput } from '@/components/message-input/next';
|
||||||
import MessageItem from '@/components/message-item';
|
import MessageItem from '@/components/message-item';
|
||||||
import PdfDrawer from '@/components/pdf-drawer';
|
import PdfSheet from '@/components/pdf-drawer';
|
||||||
import { useClickDrawer } from '@/components/pdf-drawer/hooks';
|
import { useClickDrawer } from '@/components/pdf-drawer/hooks';
|
||||||
import { Button } from '@/components/ui/button';
|
import { Button } from '@/components/ui/button';
|
||||||
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
|
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
|
||||||
|
|
@ -257,12 +257,12 @@ export function MultipleChatBox({
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
{visible && (
|
{visible && (
|
||||||
<PdfDrawer
|
<PdfSheet
|
||||||
visible={visible}
|
visible={visible}
|
||||||
hideModal={hideModal}
|
hideModal={hideModal}
|
||||||
documentId={documentId}
|
documentId={documentId}
|
||||||
chunk={selectedChunk}
|
chunk={selectedChunk}
|
||||||
></PdfDrawer>
|
></PdfSheet>
|
||||||
)}
|
)}
|
||||||
</section>
|
</section>
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import { NextMessageInput } from '@/components/message-input/next';
|
import { NextMessageInput } from '@/components/message-input/next';
|
||||||
import MessageItem from '@/components/message-item';
|
import MessageItem from '@/components/message-item';
|
||||||
import PdfDrawer from '@/components/pdf-drawer';
|
import PdfSheet from '@/components/pdf-drawer';
|
||||||
import { useClickDrawer } from '@/components/pdf-drawer/hooks';
|
import { useClickDrawer } from '@/components/pdf-drawer/hooks';
|
||||||
import { MessageType } from '@/constants/chat';
|
import { MessageType } from '@/constants/chat';
|
||||||
import {
|
import {
|
||||||
|
|
@ -101,12 +101,12 @@ export function SingleChatBox({ controller, stopOutputMessage }: IProps) {
|
||||||
removeFile={removeFile}
|
removeFile={removeFile}
|
||||||
/>
|
/>
|
||||||
{visible && (
|
{visible && (
|
||||||
<PdfDrawer
|
<PdfSheet
|
||||||
visible={visible}
|
visible={visible}
|
||||||
hideModal={hideModal}
|
hideModal={hideModal}
|
||||||
documentId={documentId}
|
documentId={documentId}
|
||||||
chunk={selectedChunk}
|
chunk={selectedChunk}
|
||||||
></PdfDrawer>
|
></PdfSheet>
|
||||||
)}
|
)}
|
||||||
</section>
|
</section>
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
import { EmbedContainer } from '@/components/embed-container';
|
import { EmbedContainer } from '@/components/embed-container';
|
||||||
import { NextMessageInput } from '@/components/message-input/next';
|
import { NextMessageInput } from '@/components/message-input/next';
|
||||||
import MessageItem from '@/components/message-item';
|
import MessageItem from '@/components/message-item';
|
||||||
import PdfDrawer from '@/components/pdf-drawer';
|
import PdfSheet from '@/components/pdf-drawer';
|
||||||
import { useClickDrawer } from '@/components/pdf-drawer/hooks';
|
import { useClickDrawer } from '@/components/pdf-drawer/hooks';
|
||||||
import { MessageType, SharedFrom } from '@/constants/chat';
|
import { MessageType, SharedFrom } from '@/constants/chat';
|
||||||
import { useFetchNextConversationSSE } from '@/hooks/chat-hooks';
|
import { useFetchNextConversationSSE } from '@/hooks/chat-hooks';
|
||||||
|
|
@ -123,12 +123,12 @@ const ChatContainer = () => {
|
||||||
</div>
|
</div>
|
||||||
</EmbedContainer>
|
</EmbedContainer>
|
||||||
{visible && (
|
{visible && (
|
||||||
<PdfDrawer
|
<PdfSheet
|
||||||
visible={visible}
|
visible={visible}
|
||||||
hideModal={hideModal}
|
hideModal={hideModal}
|
||||||
documentId={documentId}
|
documentId={documentId}
|
||||||
chunk={selectedChunk}
|
chunk={selectedChunk}
|
||||||
></PdfDrawer>
|
></PdfSheet>
|
||||||
)}
|
)}
|
||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue