feat: remove unused parameter from query methods across multiple implementations
This commit is contained in:
parent
60564cf453
commit
874ddda605
9 changed files with 29 additions and 84 deletions
|
|
@ -219,7 +219,7 @@ class BaseVectorStorage(StorageNameSpace, ABC):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def query(
|
async def query(
|
||||||
self, query: str, top_k: int, ids: list[str] | None = None
|
self, query: str, top_k: int
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
"""Query the vector storage and retrieve top_k results."""
|
"""Query the vector storage and retrieve top_k results."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -165,7 +165,7 @@ class ChromaVectorDBStorage(BaseVectorStorage):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def query(
|
async def query(
|
||||||
self, query: str, top_k: int, ids: list[str] | None = None
|
self, query: str, top_k: int
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
try:
|
try:
|
||||||
embedding = await self.embedding_func(
|
embedding = await self.embedding_func(
|
||||||
|
|
|
||||||
|
|
@ -180,7 +180,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
||||||
return [m["__id__"] for m in list_data]
|
return [m["__id__"] for m in list_data]
|
||||||
|
|
||||||
async def query(
|
async def query(
|
||||||
self, query: str, top_k: int, ids: list[str] | None = None
|
self, query: str, top_k: int
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Search by a textual query; returns top_k results with their metadata + similarity distance.
|
Search by a textual query; returns top_k results with their metadata + similarity distance.
|
||||||
|
|
|
||||||
|
|
@ -810,7 +810,7 @@ class MilvusVectorDBStorage(BaseVectorStorage):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
async def query(
|
async def query(
|
||||||
self, query: str, top_k: int, ids: list[str] | None = None
|
self, query: str, top_k: int
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
# Ensure collection is loaded before querying
|
# Ensure collection is loaded before querying
|
||||||
self._ensure_collection_loaded()
|
self._ensure_collection_loaded()
|
||||||
|
|
|
||||||
|
|
@ -1771,7 +1771,7 @@ class MongoVectorDBStorage(BaseVectorStorage):
|
||||||
return list_data
|
return list_data
|
||||||
|
|
||||||
async def query(
|
async def query(
|
||||||
self, query: str, top_k: int, ids: list[str] | None = None
|
self, query: str, top_k: int
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
"""Queries the vector database using Atlas Vector Search."""
|
"""Queries the vector database using Atlas Vector Search."""
|
||||||
# Generate the embedding
|
# Generate the embedding
|
||||||
|
|
|
||||||
|
|
@ -137,7 +137,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
||||||
)
|
)
|
||||||
|
|
||||||
async def query(
|
async def query(
|
||||||
self, query: str, top_k: int, ids: list[str] | None = None
|
self, query: str, top_k: int
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
# Execute embedding outside of lock to avoid improve cocurrent
|
# Execute embedding outside of lock to avoid improve cocurrent
|
||||||
embedding = await self.embedding_func(
|
embedding = await self.embedding_func(
|
||||||
|
|
|
||||||
|
|
@ -2005,7 +2005,7 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
|
|
||||||
#################### query method ###############
|
#################### query method ###############
|
||||||
async def query(
|
async def query(
|
||||||
self, query: str, top_k: int, ids: list[str] | None = None
|
self, query: str, top_k: int
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
embeddings = await self.embedding_func(
|
embeddings = await self.embedding_func(
|
||||||
[query], _priority=5
|
[query], _priority=5
|
||||||
|
|
@ -2016,7 +2016,6 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
sql = SQL_TEMPLATES[self.namespace].format(embedding_string=embedding_string)
|
sql = SQL_TEMPLATES[self.namespace].format(embedding_string=embedding_string)
|
||||||
params = {
|
params = {
|
||||||
"workspace": self.workspace,
|
"workspace": self.workspace,
|
||||||
"doc_ids": ids,
|
|
||||||
"closer_than_threshold": 1 - self.cosine_better_than_threshold,
|
"closer_than_threshold": 1 - self.cosine_better_than_threshold,
|
||||||
"top_k": top_k,
|
"top_k": top_k,
|
||||||
}
|
}
|
||||||
|
|
@ -4578,85 +4577,31 @@ SQL_TEMPLATES = {
|
||||||
update_time = EXCLUDED.update_time
|
update_time = EXCLUDED.update_time
|
||||||
""",
|
""",
|
||||||
"relationships": """
|
"relationships": """
|
||||||
WITH relevant_chunks AS (SELECT id as chunk_id
|
SELECT r.source_id as src_id, r.target_id as tgt_id,
|
||||||
FROM LIGHTRAG_VDB_CHUNKS
|
EXTRACT(EPOCH FROM r.create_time)::BIGINT as created_at
|
||||||
WHERE $2
|
FROM LIGHTRAG_VDB_RELATION r
|
||||||
:: varchar [] IS NULL OR full_doc_id = ANY ($2:: varchar [])
|
WHERE r.workspace = $1
|
||||||
)
|
AND r.content_vector <=> '[{embedding_string}]'::vector < $2
|
||||||
, rc AS (
|
ORDER BY r.content_vector <=> '[{embedding_string}]'::vector
|
||||||
SELECT array_agg(chunk_id) AS chunk_arr
|
LIMIT $3
|
||||||
FROM relevant_chunks
|
|
||||||
), cand AS (
|
|
||||||
SELECT
|
|
||||||
r.id, r.source_id AS src_id, r.target_id AS tgt_id, r.chunk_ids, r.create_time, r.content_vector <=> '[{embedding_string}]'::vector AS dist
|
|
||||||
FROM LIGHTRAG_VDB_RELATION r
|
|
||||||
WHERE r.workspace = $1
|
|
||||||
ORDER BY r.content_vector <=> '[{embedding_string}]'::vector
|
|
||||||
LIMIT ($4 * 50)
|
|
||||||
)
|
|
||||||
SELECT c.src_id,
|
|
||||||
c.tgt_id,
|
|
||||||
EXTRACT(EPOCH FROM c.create_time) ::BIGINT AS created_at
|
|
||||||
FROM cand c
|
|
||||||
JOIN rc ON TRUE
|
|
||||||
WHERE c.dist < $3
|
|
||||||
AND c.chunk_ids && (rc.chunk_arr::varchar[])
|
|
||||||
ORDER BY c.dist, c.id
|
|
||||||
LIMIT $4;
|
|
||||||
""",
|
""",
|
||||||
"entities": """
|
"entities": """
|
||||||
WITH relevant_chunks AS (SELECT id as chunk_id
|
SELECT e.entity_name,
|
||||||
FROM LIGHTRAG_VDB_CHUNKS
|
EXTRACT(EPOCH FROM e.create_time)::BIGINT as created_at
|
||||||
WHERE $2
|
|
||||||
:: varchar [] IS NULL OR full_doc_id = ANY ($2:: varchar [])
|
|
||||||
)
|
|
||||||
, rc AS (
|
|
||||||
SELECT array_agg(chunk_id) AS chunk_arr
|
|
||||||
FROM relevant_chunks
|
|
||||||
), cand AS (
|
|
||||||
SELECT
|
|
||||||
e.id, e.entity_name, e.chunk_ids, e.create_time, e.content_vector <=> '[{embedding_string}]'::vector AS dist
|
|
||||||
FROM LIGHTRAG_VDB_ENTITY e
|
FROM LIGHTRAG_VDB_ENTITY e
|
||||||
WHERE e.workspace = $1
|
WHERE e.workspace = $1
|
||||||
|
AND e.content_vector <=> '[{embedding_string}]'::vector < $2
|
||||||
ORDER BY e.content_vector <=> '[{embedding_string}]'::vector
|
ORDER BY e.content_vector <=> '[{embedding_string}]'::vector
|
||||||
LIMIT ($4 * 50)
|
LIMIT $3
|
||||||
)
|
|
||||||
SELECT c.entity_name,
|
|
||||||
EXTRACT(EPOCH FROM c.create_time) ::BIGINT AS created_at
|
|
||||||
FROM cand c
|
|
||||||
JOIN rc ON TRUE
|
|
||||||
WHERE c.dist < $3
|
|
||||||
AND c.chunk_ids && (rc.chunk_arr::varchar[])
|
|
||||||
ORDER BY c.dist, c.id
|
|
||||||
LIMIT $4;
|
|
||||||
""",
|
""",
|
||||||
"chunks": """
|
"chunks": """
|
||||||
WITH relevant_chunks AS (SELECT id as chunk_id
|
SELECT id, content, file_path,
|
||||||
FROM LIGHTRAG_VDB_CHUNKS
|
EXTRACT(EPOCH FROM create_time)::BIGINT as created_at
|
||||||
WHERE $2
|
FROM LIGHTRAG_VDB_CHUNKS
|
||||||
:: varchar [] IS NULL OR full_doc_id = ANY ($2:: varchar [])
|
WHERE workspace = $1
|
||||||
)
|
AND content_vector <=> '[{embedding_string}]'::vector < $2
|
||||||
, rc AS (
|
ORDER BY content_vector <=> '[{embedding_string}]'::vector
|
||||||
SELECT array_agg(chunk_id) AS chunk_arr
|
LIMIT $3
|
||||||
FROM relevant_chunks
|
|
||||||
), cand AS (
|
|
||||||
SELECT
|
|
||||||
id, content, file_path, create_time, content_vector <=> '[{embedding_string}]'::vector AS dist
|
|
||||||
FROM LIGHTRAG_VDB_CHUNKS
|
|
||||||
WHERE workspace = $1
|
|
||||||
ORDER BY content_vector <=> '[{embedding_string}]'::vector
|
|
||||||
LIMIT ($4 * 50)
|
|
||||||
)
|
|
||||||
SELECT c.id,
|
|
||||||
c.content,
|
|
||||||
c.file_path,
|
|
||||||
EXTRACT(EPOCH FROM c.create_time) ::BIGINT AS created_at
|
|
||||||
FROM cand c
|
|
||||||
JOIN rc ON TRUE
|
|
||||||
WHERE c.dist < $3
|
|
||||||
AND c.id = ANY (rc.chunk_arr)
|
|
||||||
ORDER BY c.dist, c.id
|
|
||||||
LIMIT $4;
|
|
||||||
""",
|
""",
|
||||||
# DROP tables
|
# DROP tables
|
||||||
"drop_specifiy_table_workspace": """
|
"drop_specifiy_table_workspace": """
|
||||||
|
|
|
||||||
|
|
@ -200,7 +200,7 @@ class QdrantVectorDBStorage(BaseVectorStorage):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
async def query(
|
async def query(
|
||||||
self, query: str, top_k: int, ids: list[str] | None = None
|
self, query: str, top_k: int
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
embedding = await self.embedding_func(
|
embedding = await self.embedding_func(
|
||||||
[query], _priority=5
|
[query], _priority=5
|
||||||
|
|
|
||||||
|
|
@ -2055,7 +2055,7 @@ async def _get_vector_context(
|
||||||
# Use chunk_top_k if specified, otherwise fall back to top_k
|
# Use chunk_top_k if specified, otherwise fall back to top_k
|
||||||
search_top_k = query_param.chunk_top_k or query_param.top_k
|
search_top_k = query_param.chunk_top_k or query_param.top_k
|
||||||
|
|
||||||
results = await chunks_vdb.query(query, top_k=search_top_k, ids=query_param.ids)
|
results = await chunks_vdb.query(query, top_k=search_top_k)
|
||||||
if not results:
|
if not results:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
@ -2599,7 +2599,7 @@ async def _get_node_data(
|
||||||
)
|
)
|
||||||
|
|
||||||
results = await entities_vdb.query(
|
results = await entities_vdb.query(
|
||||||
query, top_k=query_param.top_k, ids=query_param.ids
|
query, top_k=query_param.top_k
|
||||||
)
|
)
|
||||||
|
|
||||||
if not len(results):
|
if not len(results):
|
||||||
|
|
@ -2875,7 +2875,7 @@ async def _get_edge_data(
|
||||||
)
|
)
|
||||||
|
|
||||||
results = await relationships_vdb.query(
|
results = await relationships_vdb.query(
|
||||||
keywords, top_k=query_param.top_k, ids=query_param.ids
|
keywords, top_k=query_param.top_k
|
||||||
)
|
)
|
||||||
|
|
||||||
if not len(results):
|
if not len(results):
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue