Feat: API supports toc_enhance.
This commit is contained in:
parent
4c8f9f0d77
commit
1d786aef25
4 changed files with 18 additions and 2 deletions
|
|
@ -32,7 +32,7 @@ class IterationParam(ComponentParamBase):
|
|||
def __init__(self):
|
||||
super().__init__()
|
||||
self.items_ref = ""
|
||||
self.veriable={}
|
||||
self.variable={}
|
||||
|
||||
def get_input_form(self) -> dict[str, dict]:
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ from flasgger import Swagger
|
|||
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
||||
from quart_cors import cors
|
||||
from common.constants import StatusEnum
|
||||
from api.db.db_models import close_connection
|
||||
from api.db.db_models import close_connection, APIToken
|
||||
from api.db.services import UserService
|
||||
from api.utils.json_encode import CustomJSONEncoder
|
||||
from api.utils import commands
|
||||
|
|
@ -124,6 +124,10 @@ def _load_user():
|
|||
user = UserService.query(
|
||||
access_token=access_token, status=StatusEnum.VALID.value
|
||||
)
|
||||
if not user and len(authorization.split()) == 2:
|
||||
objs = APIToken.query(token=authorization.split()[1])
|
||||
if objs:
|
||||
user = UserService.query(id=objs[0].tenant_id, status=StatusEnum.VALID.value)
|
||||
if user:
|
||||
if not user[0].access_token or not user[0].access_token.strip():
|
||||
logging.warning(f"User {user[0].email} has empty access_token in database")
|
||||
|
|
|
|||
|
|
@ -1434,6 +1434,7 @@ async def retrieval_test(tenant_id):
|
|||
question = req["question"]
|
||||
doc_ids = req.get("document_ids", [])
|
||||
use_kg = req.get("use_kg", False)
|
||||
toc_enhance = req.get("toc_enhance", False)
|
||||
langs = req.get("cross_languages", [])
|
||||
if not isinstance(doc_ids, list):
|
||||
return get_error_data_result("`documents` should be a list")
|
||||
|
|
@ -1487,6 +1488,11 @@ async def retrieval_test(tenant_id):
|
|||
highlight=highlight,
|
||||
rank_feature=label_question(question, kbs),
|
||||
)
|
||||
if toc_enhance:
|
||||
chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT)
|
||||
cks = settings.retriever.retrieval_by_toc(question, ranks["chunks"], tenant_ids, chat_mdl, size)
|
||||
if cks:
|
||||
ranks["chunks"] = cks
|
||||
if use_kg:
|
||||
ck = settings.kg_retriever.retrieval(question, [k.tenant_id for k in kbs], kb_ids, embd_mdl, LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||
if ck["content_with_weight"]:
|
||||
|
|
|
|||
|
|
@ -2072,6 +2072,7 @@ Retrieves chunks from specified datasets.
|
|||
- `"cross_languages"`: `list[string]`
|
||||
- `"metadata_condition"`: `object`
|
||||
- `"use_kg"`: `boolean`
|
||||
- `"toc_enhance"`: `boolean`
|
||||
##### Request example
|
||||
|
||||
```bash
|
||||
|
|
@ -2122,6 +2123,8 @@ curl --request POST \
|
|||
The number of chunks engaged in vector cosine computation. Defaults to `1024`.
|
||||
- `"use_kg"`: (*Body parameter*), `boolean`
|
||||
The search includes text chunks related to the knowledge graph of the selected dataset to handle complex multi-hop queries. Defaults to `False`.
|
||||
- `"toc_enhance"`: (*Body parameter*), `boolean`
|
||||
The search includes table of content enhancement in order to boost rank of relevant chunks. Files parsed with `TOC Enhance` enabled is prerequisite. Defaults to `False`.
|
||||
- `"rerank_id"`: (*Body parameter*), `integer`
|
||||
The ID of the rerank model.
|
||||
- `"keyword"`: (*Body parameter*), `boolean`
|
||||
|
|
@ -2136,6 +2139,9 @@ curl --request POST \
|
|||
The languages that should be translated into, in order to achieve keywords retrievals in different languages.
|
||||
- `"metadata_condition"`: (*Body parameter*), `object`
|
||||
The metadata condition used for filtering chunks:
|
||||
- `"logic"`: (*Body parameter*), `string`
|
||||
- `"and"` Intersection of the result from each condition (default).
|
||||
- `"or"` union of the result from each condition.
|
||||
- `"conditions"`: (*Body parameter*), `array`
|
||||
A list of metadata filter conditions.
|
||||
- `"name"`: `string` - The metadata field name to filter by, e.g., `"author"`, `"company"`, `"url"`. Ensure this parameter before use. See [Set metadata](../guides/dataset/set_metadata.md) for details.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue