Fixes
This commit is contained in:
parent
49b80ec898
commit
1a213a9f93
9 changed files with 1832 additions and 1183 deletions
File diff suppressed because one or more lines are too long
|
|
@ -178,7 +178,12 @@ async def process_text(input_text: str, file_metadata: dict):
|
|||
|
||||
results = await resolve_cross_graph_references(nodes_by_layer)
|
||||
|
||||
|
||||
relationships = graph_ready_output(results)
|
||||
# print(relationships)
|
||||
await graph_client.load_graph_from_file()
|
||||
|
||||
graph = graph_client.graph
|
||||
|
||||
connect_nodes_in_graph(graph, relationships)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
import asyncio
|
||||
from enum import Enum, auto
|
||||
from typing import Dict, Any, Callable, List
|
||||
from pydantic import BaseModel, validator
|
||||
from pydantic import BaseModel, field_validator
|
||||
from cognee.modules.search.graph.search_adjacent import search_adjacent
|
||||
from cognee.modules.search.vector.search_similarity import search_similarity
|
||||
from cognee.modules.search.graph.search_categories import search_categories
|
||||
|
|
@ -28,7 +28,7 @@ class SearchParameters(BaseModel):
|
|||
search_type: SearchType
|
||||
params: Dict[str, Any]
|
||||
|
||||
@validator('search_type', pre=True)
|
||||
@field_validator('search_type', mode='before')
|
||||
def convert_string_to_enum(cls, value):
|
||||
if isinstance(value, str):
|
||||
return SearchType.from_str(value)
|
||||
|
|
@ -80,8 +80,8 @@ if __name__ == "__main__":
|
|||
await graph_client.load_graph_from_file()
|
||||
graph = graph_client.graph
|
||||
# Assuming 'graph' is your graph object, obtained from somewhere
|
||||
search_type = 'ADJACENT'
|
||||
params = {'query': 'example query', 'other_param': {"node_id": "LLM_LAYER_SUMMARY:DOCUMENT:881ecb36-2819-54c3-8147-ed80293084d6"}}
|
||||
search_type = 'CATEGORIES'
|
||||
params = {'query': 'Ministarstvo', 'other_param': {"node_id": "LLM_LAYER_SUMMARY:DOCUMENT:881ecb36-2819-54c3-8147-ed80293084d6"}}
|
||||
|
||||
results = await search(graph, search_type, params)
|
||||
print(results)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,6 @@ from typing import Any, Dict
|
|||
from pydantic import BaseModel
|
||||
|
||||
class ScoredResult(BaseModel):
|
||||
id: UUID
|
||||
score: int
|
||||
id: str
|
||||
score: float
|
||||
payload: Dict[str, Any]
|
||||
|
|
|
|||
|
|
@ -49,17 +49,19 @@ class WeaviateAdapter(VectorDBInterface):
|
|||
return self.get_collection(collection_name).data.insert_many(objects)
|
||||
|
||||
async def search(self, collection_name: str, query_text: str, limit: int, with_vector: bool = False):
|
||||
search_result = self.get_collection(collection_name).query.bm25(
|
||||
search_result = self.get_collection(collection_name).query.hybrid(
|
||||
query = query_text,
|
||||
limit = limit,
|
||||
include_vector = with_vector,
|
||||
return_metadata = wvc.query.MetadataQuery(score = True),
|
||||
)
|
||||
|
||||
# print(search_result.objects)
|
||||
|
||||
return list(map(lambda result: ScoredResult(
|
||||
id = result.uuid,
|
||||
id = str(result.uuid),
|
||||
payload = result.properties,
|
||||
score = str(result.metadata.score)
|
||||
score = float(result.metadata.score)
|
||||
), search_result.objects))
|
||||
|
||||
async def batch_search(self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False):
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ async def group_nodes_by_layer(node_descriptions):
|
|||
|
||||
return grouped_data
|
||||
|
||||
def connect_nodes_in_graph(graph: Graph, relationship_dict: dict) -> Graph:
|
||||
def connect_nodes_in_graph(graph: Graph, relationship_dict: dict, score_treshold:float=None) -> Graph:
|
||||
"""
|
||||
For each relationship in relationship_dict, check if both nodes exist in the graph based on node attributes.
|
||||
If they do, create a connection (edge) between them.
|
||||
|
|
@ -39,37 +39,43 @@ def connect_nodes_in_graph(graph: Graph, relationship_dict: dict) -> Graph:
|
|||
:param graph: A NetworkX graph object
|
||||
:param relationship_dict: A dictionary containing relationships between nodes
|
||||
"""
|
||||
if score_treshold is None:
|
||||
score_treshold = 0.9
|
||||
for id, relationships in relationship_dict.items():
|
||||
for relationship in relationships:
|
||||
searched_node_attr_id = relationship["searched_node_id"]
|
||||
score_attr_id = relationship["original_id_for_search"]
|
||||
score = relationship["score"]
|
||||
|
||||
# Initialize node keys for both searched_node and score_node
|
||||
searched_node_key, score_node_key = None, None
|
||||
if score> score_treshold:
|
||||
# Initialize node keys for both searched_node and score_node
|
||||
searched_node_key, score_node_key = None, None
|
||||
|
||||
# Find nodes in the graph that match the searched_node_id and score_id from their attributes
|
||||
for node, attrs in graph.nodes(data = True):
|
||||
if "unique_id" in attrs: # Ensure there is an "id" attribute
|
||||
if attrs["unique_id"] == searched_node_attr_id:
|
||||
searched_node_key = node
|
||||
elif attrs["unique_id"] == score_attr_id:
|
||||
score_node_key = node
|
||||
# Find nodes in the graph that match the searched_node_id and score_id from their attributes
|
||||
for node, attrs in graph.nodes(data = True):
|
||||
if "unique_id" in attrs: # Ensure there is an "id" attribute
|
||||
if attrs["unique_id"] == searched_node_attr_id:
|
||||
searched_node_key = node
|
||||
elif attrs["unique_id"] == score_attr_id:
|
||||
score_node_key = node
|
||||
|
||||
# If both nodes are found, no need to continue checking other nodes
|
||||
if searched_node_key and score_node_key:
|
||||
break
|
||||
# If both nodes are found, no need to continue checking other nodes
|
||||
if searched_node_key and score_node_key:
|
||||
break
|
||||
|
||||
# Check if both nodes were found in the graph
|
||||
if searched_node_key is not None and score_node_key is not None:
|
||||
# If both nodes exist, create an edge between them
|
||||
# You can customize the edge attributes as needed, here we use "score" as an attribute
|
||||
graph.add_edge(
|
||||
searched_node_key,
|
||||
score_node_key,
|
||||
weight = score,
|
||||
score_metadata = relationship.get("score_metadata")
|
||||
)
|
||||
# Check if both nodes were found in the graph
|
||||
if searched_node_key is not None and score_node_key is not None:
|
||||
# print(f"Connecting {searched_node_key} to {score_node_key}")
|
||||
# If both nodes exist, create an edge between them
|
||||
# You can customize the edge attributes as needed, here we use "score" as an attribute
|
||||
graph.add_edge(
|
||||
searched_node_key,
|
||||
score_node_key,
|
||||
weight = score,
|
||||
score_metadata = relationship.get("score_metadata")
|
||||
)
|
||||
else:
|
||||
pass
|
||||
|
||||
return graph
|
||||
|
||||
|
|
@ -97,3 +103,32 @@ def graph_ready_output(results):
|
|||
})
|
||||
|
||||
return relationship_dict
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
async def main():
|
||||
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
|
||||
await graph_client.load_graph_from_file()
|
||||
|
||||
|
||||
graph = graph_client.graph
|
||||
|
||||
# for nodes, attr in graph.nodes(data=True):
|
||||
# if 'd0bd0f6a-09e5-4308-89f6-400d66895126' in nodes:
|
||||
# print(nodes)
|
||||
|
||||
|
||||
relationships = {'SuaGeKyKWKWyaSeiqWeWaSyuSKqieSamiyah': [{'collection_id': 'SuaGeKyKWKWyaSeiqWeWaSyuSKqieSamiyah', 'searched_node_id': 'd0bd0f6a-09e5-4308-89f6-400d66895126', 'score': 1.0, 'score_metadata': {'text': 'Pravilnik o izmenama i dopunama Pravilnika o sadržini, načinu i postupku izrade i način vršenja kontrole tehničke dokumentacije prema klasi i nameni objekata'}, 'original_id_for_search': '2801f7b5-55bf-499b-9843-97d48f8e067a'}, {'collection_id': 'SuaGeKyKWKWyaSeiqWeWaSyuSKqieSamiyah', 'searched_node_id': 'd0bd0f6a-09e5-4308-89f6-400d66895126', 'score': 0.1648828387260437, 'score_metadata': {'text': 'Zakon o planiranju i izgradnji'}, 'original_id_for_search': '57966b55-33e2-4eae-a7fa-2f0237643bbe'}, {'collection_id': 'SuaGeKyKWKWyaSeiqWeWaSyuSKqieSamiyah', 'searched_node_id': 'd0bd0f6a-09e5-4308-89f6-400d66895126', 'score': 0.12986786663532257, 'score_metadata': {'text': 'Službeni glasnik RS, broj 77/2015'}, 'original_id_for_search': '0f626d48-4441-43c1-9060-ea7e54f6d8e2'}, {'collection_id': 'SuaGeKyKWKWyaSeiqWeWaSyuSKqieSamiyah', 'searched_node_id': 'c9b9a460-c64a-4e2e-a4d6-aa5b3769274b', 'score': 1.0, 'score_metadata': {'text': 'Službeni glasnik RS, broj 77/2015'}, 'original_id_for_search': '0f626d48-4441-43c1-9060-ea7e54f6d8e2'}, {'collection_id': 'SuaGeKyKWKWyaSeiqWeWaSyuSKqieSamiyah', 'searched_node_id': 'c9b9a460-c64a-4e2e-a4d6-aa5b3769274b', 'score': 0.07603412866592407, 'score_metadata': {'text': 'Prof. dr Zorana Mihajlović'}, 'original_id_for_search': '5d064a62-3cd6-4895-9f60-1a0d8bc299e8'}, {'collection_id': 'SuaGeKyKWKWyaSeiqWeWaSyuSKqieSamiyah', 'searched_node_id': 'c9b9a460-c64a-4e2e-a4d6-aa5b3769274b', 'score': 0.07226034998893738, 'score_metadata': {'text': 'Ministar građevinarstva, saobraćaja i infrastrukture'}, 'original_id_for_search': 'f5d052ca-c4a0-490e-a3ac-d8ad522dea83'}, {'collection_id': 'SuaGeKyKWKWyaSeiqWeWaSyuSKqieSamiyah', 'searched_node_id': 'bbd6d2d6-e673-4b59-a50c-516972a9d0de', 'score': 0.5, 'score_metadata': {'text': 'Pravilnik o izmenama i dopunama Pravilnika o sadržini, načinu i postupku izrade i način vršenja kontrole tehničke dokumentacije prema klasi i nameni objekata'}, 'original_id_for_search': '2801f7b5-55bf-499b-9843-97d48f8e067a'}]}
|
||||
|
||||
connect_nodes_in_graph(graph, relationships)
|
||||
|
||||
from cognitive_architecture.utils import render_graph
|
||||
|
||||
graph_url = await render_graph(graph, graph_type="networkx")
|
||||
|
||||
print(graph_url)
|
||||
|
||||
import asyncio
|
||||
asyncio.run(main())
|
||||
|
|
|
|||
|
|
@ -28,11 +28,18 @@ async def search_similarity(query: str, graph, other_param: str = None):
|
|||
|
||||
for proposition_id in out[0][0]:
|
||||
for n, attr in graph.nodes(data = True):
|
||||
if proposition_id in n:
|
||||
if str(proposition_id) in str(n):
|
||||
for n_, attr_ in graph.nodes(data=True):
|
||||
relevant_layer = attr["layer_uuid"]
|
||||
|
||||
if attr_.get("layer_uuid") == relevant_layer:
|
||||
relevant_context.append(attr_["description"])
|
||||
|
||||
def deduplicate_list(original_list):
|
||||
seen = set()
|
||||
deduplicated_list = [x for x in original_list if not (x in seen or seen.add(x))]
|
||||
return deduplicated_list
|
||||
|
||||
relevant_context = deduplicate_list(relevant_context)
|
||||
|
||||
return relevant_context
|
||||
|
|
|
|||
1048
poetry.lock
generated
1048
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "cognee"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1"
|
||||
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
|
||||
authors = ["Vasilije Markovic"]
|
||||
readme = "README.md"
|
||||
|
|
@ -18,7 +18,6 @@ classifiers = [
|
|||
|
||||
[tool.poetry.dependencies]
|
||||
python = "~3.10"
|
||||
langchain = "^0.0.338"
|
||||
openai = "1.12.0"
|
||||
python-dotenv = "1.0.1"
|
||||
fastapi = "^0.109.2"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue