diff --git a/agent/component/iteration.py b/agent/component/iteration.py index cff09d622..ae5c0b677 100644 --- a/agent/component/iteration.py +++ b/agent/component/iteration.py @@ -32,7 +32,7 @@ class IterationParam(ComponentParamBase): def __init__(self): super().__init__() self.items_ref = "" - self.veriable={} + self.variable={} def get_input_form(self) -> dict[str, dict]: return { diff --git a/api/apps/__init__.py b/api/apps/__init__.py index a53f67c06..a6e33c13b 100644 --- a/api/apps/__init__.py +++ b/api/apps/__init__.py @@ -24,7 +24,7 @@ from flasgger import Swagger from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer from quart_cors import cors from common.constants import StatusEnum -from api.db.db_models import close_connection +from api.db.db_models import close_connection, APIToken from api.db.services import UserService from api.utils.json_encode import CustomJSONEncoder from api.utils import commands @@ -124,6 +124,10 @@ def _load_user(): user = UserService.query( access_token=access_token, status=StatusEnum.VALID.value ) + if not user and len(authorization.split()) == 2: + objs = APIToken.query(token=authorization.split()[1]) + if objs: + user = UserService.query(id=objs[0].tenant_id, status=StatusEnum.VALID.value) if user: if not user[0].access_token or not user[0].access_token.strip(): logging.warning(f"User {user[0].email} has empty access_token in database") diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 30fbd835e..52acebc43 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -1434,6 +1434,7 @@ async def retrieval_test(tenant_id): question = req["question"] doc_ids = req.get("document_ids", []) use_kg = req.get("use_kg", False) + toc_enhance = req.get("toc_enhance", False) langs = req.get("cross_languages", []) if not isinstance(doc_ids, list): return get_error_data_result("`documents` should be a list") @@ -1487,6 +1488,11 @@ async def retrieval_test(tenant_id): highlight=highlight, rank_feature=label_question(question, kbs), ) + if toc_enhance: + chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT) + cks = settings.retriever.retrieval_by_toc(question, ranks["chunks"], tenant_ids, chat_mdl, size) + if cks: + ranks["chunks"] = cks if use_kg: ck = settings.kg_retriever.retrieval(question, [k.tenant_id for k in kbs], kb_ids, embd_mdl, LLMBundle(kb.tenant_id, LLMType.CHAT)) if ck["content_with_weight"]: diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index bc1b15670..253745432 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -2072,6 +2072,7 @@ Retrieves chunks from specified datasets. - `"cross_languages"`: `list[string]` - `"metadata_condition"`: `object` - `"use_kg"`: `boolean` + - `"toc_enhance"`: `boolean` ##### Request example ```bash @@ -2122,6 +2123,8 @@ curl --request POST \ The number of chunks engaged in vector cosine computation. Defaults to `1024`. - `"use_kg"`: (*Body parameter*), `boolean` The search includes text chunks related to the knowledge graph of the selected dataset to handle complex multi-hop queries. Defaults to `False`. +- `"toc_enhance"`: (*Body parameter*), `boolean` + The search includes table of content enhancement in order to boost rank of relevant chunks. Files parsed with `TOC Enhance` enabled is prerequisite. Defaults to `False`. - `"rerank_id"`: (*Body parameter*), `integer` The ID of the rerank model. - `"keyword"`: (*Body parameter*), `boolean` @@ -2136,6 +2139,9 @@ curl --request POST \ The languages that should be translated into, in order to achieve keywords retrievals in different languages. - `"metadata_condition"`: (*Body parameter*), `object` The metadata condition used for filtering chunks: + - `"logic"`: (*Body parameter*), `string` + - `"and"` Intersection of the result from each condition (default). + - `"or"` union of the result from each condition. - `"conditions"`: (*Body parameter*), `array` A list of metadata filter conditions. - `"name"`: `string` - The metadata field name to filter by, e.g., `"author"`, `"company"`, `"url"`. Ensure this parameter before use. See [Set metadata](../guides/dataset/set_metadata.md) for details.