ragflow/sdk/python/ragflow_sdk/modules/chunk.py
Mathias Panzenböck 38e7dc6246 added type annotations and __slots__ to the Python SDK
I've also added __slots__ less so to improve efficiency and more to be sure
there are no typos on assignments.

There remain a few untyped parts where I could not find documentation of the
types. These things are in particular:

- Agent.Dsl
- Agent.create_session()
- DataSet.ParserConfig - I'm not sure if the documented parameters are complete.
- Session.ask() - kwargs specific to agent/chat
2025-11-20 00:39:32 +01:00

112 lines
3.2 KiB
Python

#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from typing import Any, NotRequired, Optional, TYPE_CHECKING, TypedDict
from .base import Base
if TYPE_CHECKING:
from ..ragflow import RAGFlow
__all__ = 'Chunk',
class UpdateMessage(TypedDict):
content: NotRequired[str]
important_keywords: NotRequired[list[str]]
available: NotRequired[bool]
class ChunkUpdateError(Exception):
__slots__ = (
'code',
'message',
'details',
)
code: Optional[int]
message: Optional[str]
details: Optional[str]
def __init__(self, code: Optional[int]=None, message: Optional[str]=None, details: Optional[str]=None):
self.code = code
self.message = message
self.details = details
super().__init__(message)
class Chunk(Base):
__slots__ = (
'id',
'content',
'important_keywords',
'questions',
'create_time',
'create_timestamp',
'dataset_id',
'document_name',
'document_id',
'available',
'similarity',
'vector_similarity',
'term_similarity',
'positions',
'doc_type',
)
id: str
content: str
important_keywords: list[str]
questions: list[str]
create_time: str
create_timestamp: float
dataset_id: Optional[str]
document_name: str
document_id: str
available: bool
similarity: float
vector_similarity: float
term_similarity: float
positions: list[str]
doc_type: str
def __init__(self, rag: "RAGFlow", res_dict: dict[str, Any]) -> None:
self.id = ""
self.content = ""
self.important_keywords = []
self.questions = []
self.create_time = ""
self.create_timestamp = 0.0
self.dataset_id = None
self.document_name = ""
self.document_id = ""
self.available = True
# Additional fields for retrieval results
self.similarity = 0.0
self.vector_similarity = 0.0
self.term_similarity = 0.0
self.positions = []
self.doc_type = ""
for k in list(res_dict.keys()):
if not hasattr(self, k):
res_dict.pop(k)
super().__init__(rag, res_dict)
def update(self, update_message: UpdateMessage) -> None:
res = self.put(f"/datasets/{self.dataset_id}/documents/{self.document_id}/chunks/{self.id}", update_message)
res = res.json()
if res.get("code") != 0:
raise ChunkUpdateError(
code=res.get("code"),
message=res.get("message"),
details=res.get("details")
)