Update docs

This commit is contained in:
Vasilije 2024-03-17 15:36:30 +01:00
parent d96c51aeaf
commit 6eb826a690
20 changed files with 5241 additions and 4790 deletions

File diff suppressed because one or more lines are too long

View file

@ -44,8 +44,7 @@ WORKDIR /app
ENV PYTHONPATH=/app ENV PYTHONPATH=/app
COPY cognee/ /app/cognee COPY cognee/ /app/cognee
COPY main.py /app
COPY api.py /app
COPY entrypoint.sh /app/entrypoint.sh COPY entrypoint.sh /app/entrypoint.sh
RUN chmod +x /app/entrypoint.sh RUN chmod +x /app/entrypoint.sh

View file

@ -202,10 +202,10 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Pipeline file_load_from_filesystem load step completed in 0.34 seconds\n", "Pipeline file_load_from_filesystem load step completed in 0.30 seconds\n",
"1 load package(s) were loaded to destination duckdb and into dataset izmene\n", "1 load package(s) were loaded to destination duckdb and into dataset izmene\n",
"The duckdb destination used duckdb:///:external: location to store data\n", "The duckdb destination used duckdb:///:external: location to store data\n",
"Load package 1710586934.8904011 is LOADED and contains no failed jobs\n" "Load package 1710664582.887609 is LOADED and contains no failed jobs\n"
] ]
} }
], ],
@ -231,7 +231,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"/Users/borisarzentar/Projects/Topoteretes/cognee/cognee/data/cognee/cognee.duckdb\n", "/Users/vasa/Projects/cognee/cognee/data/cognee/cognee.duckdb\n",
"['izmene']\n" "['izmene']\n"
] ]
} }
@ -274,12 +274,22 @@
"text": [ "text": [
"['izmene']\n", "['izmene']\n",
"Processing document (881ecb36-2819-54c3-8147-ed80293084d6)\n", "Processing document (881ecb36-2819-54c3-8147-ed80293084d6)\n",
"Document (881ecb36-2819-54c3-8147-ed80293084d6) categorized: ['Laws, regulations, and legal case documents']\n", "name 'label_content' is not defined\n"
"Document (881ecb36-2819-54c3-8147-ed80293084d6) layer graphs created\n", ]
"Document (881ecb36-2819-54c3-8147-ed80293084d6) layers connected\n", },
"Document (881ecb36-2819-54c3-8147-ed80293084d6) processed\n", {
"Graph is visualized at: https://hub.graphistry.com/graph/graph.html?dataset=037fedd82ee3490aaf52ee8e535cc3fc&type=arrow&viztoken=f05741c9-99ea-4397-a99e-34c57949c17b&usertag=e15d5b35-pygraphistry-0.33.5&splashAfter=1710587061&info=true\n", "ename": "NameError",
"None\n" "evalue": "name 'label_content' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[3], line 7\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcognee\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m render_graph\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(list_datasets())\n\u001b[0;32m----> 7\u001b[0m graph \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m cognify()\n\u001b[1;32m 9\u001b[0m graph_url \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m render_graph(graph, graph_type \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnetworkx\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28mprint\u001b[39m(graph_url)\n",
"File \u001b[0;32m~/Projects/cognee/cognee/api/v1/cognify/cognify.py:53\u001b[0m, in \u001b[0;36mcognify\u001b[0;34m(datasets, graphdatamodel)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m dataset \u001b[38;5;129;01min\u001b[39;00m datasets:\n\u001b[1;32m 51\u001b[0m awaitables\u001b[38;5;241m.\u001b[39mappend(cognify(dataset))\n\u001b[0;32m---> 53\u001b[0m graphs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39mawaitables)\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m graphs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 56\u001b[0m files_metadata \u001b[38;5;241m=\u001b[39m db\u001b[38;5;241m.\u001b[39mget_files_metadata(datasets)\n",
"File \u001b[0;32m~/Projects/cognee/cognee/api/v1/cognify/cognify.py:69\u001b[0m, in \u001b[0;36mcognify\u001b[0;34m(datasets, graphdatamodel)\u001b[0m\n\u001b[1;32m 65\u001b[0m text \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mmap\u001b[39m(\u001b[38;5;28;01mlambda\u001b[39;00m element: clean(element\u001b[38;5;241m.\u001b[39mtext), elements))\n\u001b[1;32m 67\u001b[0m awaitables\u001b[38;5;241m.\u001b[39mappend(process_text(text, file_metadata))\n\u001b[0;32m---> 69\u001b[0m graphs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39mawaitables)\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m graphs[\u001b[38;5;241m0\u001b[39m]\n",
"File \u001b[0;32m~/Projects/cognee/cognee/api/v1/cognify/cognify.py:112\u001b[0m, in \u001b[0;36mprocess_text\u001b[0;34m(input_text, file_metadata)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28mprint\u001b[39m(e)\n\u001b[0;32m--> 112\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m add_document_node(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDefaultGraphModel:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mUSER_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, file_metadata)\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDocument (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_metadata[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mid\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) categorized: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_metadata[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcategories\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[0;32m~/Projects/cognee/cognee/api/v1/cognify/cognify.py:104\u001b[0m, in \u001b[0;36mprocess_text\u001b[0;34m(input_text, file_metadata)\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 103\u001b[0m \u001b[38;5;66;03m# Classify the content into categories\u001b[39;00m\n\u001b[0;32m--> 104\u001b[0m content_labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[43mlabel_content\u001b[49m(\n\u001b[1;32m 105\u001b[0m input_text,\n\u001b[1;32m 106\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlabel_content.txt\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 107\u001b[0m SummarizedContent\n\u001b[1;32m 108\u001b[0m )\n\u001b[1;32m 109\u001b[0m file_metadata[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msummary\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m content_summary[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msummary\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"\u001b[0;31mNameError\u001b[0m: name 'label_content' is not defined"
] ]
} }
], ],
@ -296,6 +306,88 @@
"print(graph_url)\n" "print(graph_url)\n"
] ]
}, },
{
"cell_type": "code",
"execution_count": 1,
"id": "a0918362-e864-414f-902c-57ce7da6c319",
"metadata": {},
"outputs": [],
"source": [
" from cognee.shared.data_models import GraphDBType\n",
" from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client\n",
" graph_client = get_graph_client(GraphDBType.NETWORKX)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1878572f-fa96-4953-b1d2-f2b0614a7d8f",
"metadata": {},
"outputs": [],
"source": [
"from cognee.utils import render_graph"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "497ab9c0-2db9-4d5c-b140-bd17226712df",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "'MultiDiGraph' object is not callable",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[8], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m nodes, edges \u001b[38;5;129;01min\u001b[39;00m \u001b[43mgraph_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgraph\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(nodes)\n",
"\u001b[0;31mTypeError\u001b[0m: 'MultiDiGraph' object is not callable"
]
}
],
"source": [
"\n",
"for nodes, edges in graph_client.graph():\n",
" print(nodes)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "0919cd73-6ff9-40a7-90c6-a97f53d08364",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/vasa/Projects/cognee/.venv/lib/python3.10/site-packages/graphistry/util.py:276: RuntimeWarning: Graph has no edges, may have rendering issues\n",
" warnings.warn(RuntimeWarning(msg))\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Graph is visualized at: https://hub.graphistry.com/graph/graph.html?dataset=fde6391bff1a4b00af5cb631a4e2d48e&type=arrow&viztoken=bff2ce55-63ee-4671-926a-8166c32ef44c&usertag=1daaf574-pygraphistry-0.33.5&splashAfter=1710666472&info=true\n",
"None\n"
]
}
],
"source": [
"graph_url = await render_graph(graph_client.graph, graph_type = \"networkx\")\n",
"print(graph_url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da6c866c-8150-4b8c-9857-3f0bfe434a97",
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 4,

135
cognee/api/client.py Normal file
View file

@ -0,0 +1,135 @@
""" FastAPI server for the Cognee API. """
import os
import json
from uuid import UUID
import uvicorn
from fastapi import Depends
import logging
# Set up logging
logging.basicConfig(
level=logging.INFO, # Set the logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL)
format="%(asctime)s [%(levelname)s] %(message)s", # Set the log message format
)
logger = logging.getLogger(__name__)
from cognee.config import Config
config = Config()
config.load()
from typing import Dict, Any, List, Union, BinaryIO
from fastapi import FastAPI, BackgroundTasks, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
app = FastAPI(debug=True)
#
# from auth.cognito.JWTBearer import JWTBearer
# from auth.auth import jwks
#
# auth = JWTBearer(jwks)
@app.get("/")
async def root():
"""
Root endpoint that returns a welcome message.
"""
return {"message": "Hello, World, I am alive!"}
@app.get("/health")
def health_check():
"""
Health check endpoint that returns the server status.
"""
return {"status": "OK"}
class Payload(BaseModel):
payload: Dict[str, Any]
class AddPayload(BaseModel):
data: Union[str, BinaryIO, List[Union[str, BinaryIO]]]
dataset_id: UUID
dataset_name: str
class Config:
arbitrary_types_allowed = True # This is required to allow the use of Union
class CognifyPayload(BaseModel):
datasets: Union[str, List[str]]
class SearchPayload(BaseModel):
query_params: Dict[str, Any]
@app.post("/add", response_model=dict)
async def add(payload: AddPayload):
""" This endpoint is responsible for adding data to the graph."""
from v1.add.add_standalone import add_standalone
try:
await add_standalone(
payload.data,
payload.dataset_id,
payload.dataset_name,
)
except Exception as error:
return JSONResponse(
status_code = 409,
content = { "error": error }
)
@app.post("/cognify", response_model=dict)
async def cognify(payload: CognifyPayload):
""" This endpoint is responsible for the cognitive processing of the content."""
from v1.cognify.cognify import cognify
try:
await cognify(
payload.datasets,
)
except Exception as error:
return JSONResponse(
status_code = 409,
content = { "error": error }
)
@app.post("/search", response_model=dict)
async def search(payload: SearchPayload):
""" This endpoint is responsible for searching for nodes in the graph."""
from v1.search.search import search
try:
await search(
payload.query_params,
)
except Exception as error:
return JSONResponse(
status_code = 409,
content = { "error": error }
)
def start_api_server(host: str = "0.0.0.0", port: int = 8000):
"""
Start the API server using uvicorn.
Parameters:
host (str): The host for the server.
port (int): The port for the server.
"""
try:
logger.info(f"Starting server at {host}:{port}")
uvicorn.run(app, host=host, port=port)
except Exception as e:
logger.exception(f"Failed to start server: {e}")
# Here you could add any cleanup code or error recovery code.
if __name__ == "__main__":
start_api_server()

View file

@ -9,10 +9,14 @@ from unstructured.partition.pdf import partition_pdf
from cognee.infrastructure.databases.vector.qdrant.QDrantAdapter import CollectionConfig from cognee.infrastructure.databases.vector.qdrant.QDrantAdapter import CollectionConfig
from cognee.infrastructure.llm.get_llm_client import get_llm_client from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.modules.cognify.graph.add_classification_nodes import add_classification_nodes from cognee.modules.cognify.graph.add_classification_nodes import add_classification_nodes
from cognee.modules.cognify.graph.add_label_nodes import add_label_nodes
from cognee.modules.cognify.graph.add_node_connections import add_node_connection, graph_ready_output, \ from cognee.modules.cognify.graph.add_node_connections import add_node_connection, graph_ready_output, \
connect_nodes_in_graph, extract_node_descriptions connect_nodes_in_graph, extract_node_descriptions
from cognee.modules.cognify.graph.add_propositions import append_to_graph from cognee.modules.cognify.graph.add_propositions import append_to_graph
from cognee.modules.cognify.graph.add_summary_nodes import add_summary_nodes
from cognee.modules.cognify.llm.add_node_connection_embeddings import process_items from cognee.modules.cognify.llm.add_node_connection_embeddings import process_items
from cognee.modules.cognify.llm.label_content import label_content
from cognee.modules.cognify.llm.summarize_content import summarize_content
from cognee.modules.cognify.vector.batch_search import adapted_qdrant_batch_search from cognee.modules.cognify.vector.batch_search import adapted_qdrant_batch_search
from cognee.modules.cognify.vector.add_propositions import add_propositions from cognee.modules.cognify.vector.add_propositions import add_propositions
@ -20,7 +24,8 @@ from cognee.config import Config
from cognee.modules.cognify.llm.classify_content import classify_into_categories from cognee.modules.cognify.llm.classify_content import classify_into_categories
from cognee.modules.cognify.llm.content_to_cog_layers import content_to_cog_layers from cognee.modules.cognify.llm.content_to_cog_layers import content_to_cog_layers
from cognee.modules.cognify.llm.generate_graph import generate_graph from cognee.modules.cognify.llm.generate_graph import generate_graph
from cognee.shared.data_models import DefaultContentPrediction, KnowledgeGraph, DefaultCognitiveLayer from cognee.shared.data_models import DefaultContentPrediction, KnowledgeGraph, DefaultCognitiveLayer, \
SummarizedContent, LabeledContent
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognee.shared.data_models import GraphDBType from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.vector.get_vector_database import get_vector_database from cognee.infrastructure.databases.vector.get_vector_database import get_vector_database
@ -35,7 +40,7 @@ aclient = instructor.patch(OpenAI())
USER_ID = "default_user" USER_ID = "default_user"
async def cognify(datasets: Union[str, List[str]] = None): async def cognify(datasets: Union[str, List[str]] = None, graphdatamodel: object = None):
"""This function is responsible for the cognitive processing of the content.""" """This function is responsible for the cognitive processing of the content."""
db = DuckDBAdapter() db = DuckDBAdapter()
@ -56,7 +61,7 @@ async def cognify(datasets: Union[str, List[str]] = None):
awaitables = [] awaitables = []
await initialize_graph(USER_ID) await initialize_graph(USER_ID,graphdatamodel)
for file_metadata in files_metadata: for file_metadata in files_metadata:
with open(file_metadata["file_path"], "rb") as file: with open(file_metadata["file_path"], "rb") as file:
@ -86,6 +91,30 @@ async def process_text(input_text: str, file_metadata: dict):
print(e) print(e)
raise e raise e
try:
# Classify the content into categories
content_summary = await summarize_content(
input_text,
"summarize_content.txt",
SummarizedContent
)
file_metadata["summary"] = content_summary["summary"]
except Exception as e:
print(e)
raise e
try:
# Classify the content into categories
content_labels = await label_content(
input_text,
"label_content.txt",
LabeledContent
)
file_metadata["content_labels"] = content_labels["content_labels"]
except Exception as e:
print(e)
raise e
await add_document_node(f"DefaultGraphModel:{USER_ID}", file_metadata) await add_document_node(f"DefaultGraphModel:{USER_ID}", file_metadata)
print(f"Document ({file_metadata['id']}) categorized: {file_metadata['categories']}") print(f"Document ({file_metadata['id']}) categorized: {file_metadata['categories']}")
@ -112,10 +141,24 @@ async def process_text(input_text: str, file_metadata: dict):
await add_classification_nodes(f"DOCUMENT:{file_metadata['id']}", classified_categories[0]) await add_classification_nodes(f"DOCUMENT:{file_metadata['id']}", classified_categories[0])
# print(file_metadata['summary'])
await add_summary_nodes(f"DOCUMENT:{file_metadata['id']}", {"summary": file_metadata['summary']})
# print(file_metadata['content_labels'])
await add_label_nodes(f"DOCUMENT:{file_metadata['id']}", {"content_labels": file_metadata['content_labels']})
unique_layer_uuids = await append_to_graph(layer_graphs, classified_categories[0]) unique_layer_uuids = await append_to_graph(layer_graphs, classified_categories[0])
print(f"Document ({file_metadata['id']}) layers connected") print(f"Document ({file_metadata['id']}) layers connected")
print(f"Document categories, summaries and metadata are ",str(classified_categories) )
print(f"Document metadata is ",str(file_metadata) )
graph_client = get_graph_client(GraphDBType.NETWORKX) graph_client = get_graph_client(GraphDBType.NETWORKX)
await graph_client.load_graph_from_file() await graph_client.load_graph_from_file()
@ -169,8 +212,12 @@ async def process_text(input_text: str, file_metadata: dict):
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(cognify("""In the nicest possible way, Britons have always been a bit silly about animals. “Keeping pets, for the English, is not so much a leisure activity as it is an entire way of life,” wrote the anthropologist Kate Fox in Watching the English, nearly 20 years ago. Our dogs, in particular, have been an acceptable outlet for emotions and impulses we otherwise keep strictly controlled our latent desire to be demonstratively affectionate, to be silly and chat to strangers. If this seems like an exaggeration, consider the different reactions youd get if you struck up a conversation with someone in a park with a dog, versus someone on the train.
Indeed, British society has been set up to accommodate these four-legged ambassadors. In the UK unlike Australia, say, or New Zealand dogs are not just permitted on public transport but often openly encouraged. Many pubs and shops display waggish signs, reading, Dogs welcome, people tolerated, and have treat jars on their counters. The other day, as I was waiting outside a cafe with a friends dog, the barista urged me to bring her inside. async def main():
For years, Britons non-partisan passion for animals has been consistent amid dwindling common ground. But lately, rather than bringing out the best in us, our relationship with dogs is increasingly revealing us at our worst and our supposed best friends are paying the price. graph = await cognify(datasets=['izmene'])
As with so many latent traits in the national psyche, it all came unleashed with the pandemic, when many people thought they might as well make the most of all that time at home and in local parks with a dog. Between 2019 and 2022, the number of pet dogs in the UK rose from about nine million to 13 million. But theres long been a seasonal surge around this time of year, substantial enough for the Dogs Trust charity to coin its famous slogan back in 1978: A dog is for life, not just for Christmas. from cognee.utils import render_graph
""")) graph_url = await render_graph(graph, graph_type="networkx")
print(graph_url)
asyncio.run(main())

View file

@ -2,38 +2,61 @@
import asyncio import asyncio
from enum import Enum, auto from enum import Enum, auto
from typing import Dict, Any, Callable, List from typing import Dict, Any, Callable, List
from pydantic import BaseModel, validator
from cognee.modules.search.graph.search_adjacent import search_adjacent from cognee.modules.search.graph.search_adjacent import search_adjacent
from cognee.modules.search.vector.search_similarity import search_similarity from cognee.modules.search.vector.search_similarity import search_similarity
from cognee.modules.search.graph.search_categories import search_categories from cognee.modules.search.graph.search_categories import search_categories
from cognee.modules.search.graph.search_neighbour import search_neighbour from cognee.modules.search.graph.search_neighbour import search_neighbour
from cognee.modules.search.graph.search_summary import search_summary
class SearchType(Enum): class SearchType(Enum):
ADJACENT = auto() ADJACENT = 'ADJACENT'
SIMILARITY = auto() SIMILARITY = 'SIMILARITY'
CATEGORIES = auto() CATEGORIES = 'CATEGORIES'
NEIGHBOR = auto() NEIGHBOR = 'NEIGHBOR'
SUMMARY = 'SUMMARY'
@staticmethod
def from_str(name: str):
try:
return SearchType[name.upper()]
except KeyError:
raise ValueError(f"{name} is not a valid SearchType")
class SearchParameters(BaseModel):
search_type: SearchType
params: Dict[str, Any]
@validator('search_type', pre=True)
def convert_string_to_enum(cls, value):
if isinstance(value, str):
return SearchType.from_str(value)
return value
async def search(graph, query_params: Dict[SearchType, Dict[str, Any]]) -> List: async def perform_search(graph, search_type: str, params: Dict[str, Any]) -> List:
search_params = SearchParameters(search_type=search_type, params=params)
return await search(graph, [search_params])
async def search(graph, query_params: List[SearchParameters]) -> List:
search_functions: Dict[SearchType, Callable] = { search_functions: Dict[SearchType, Callable] = {
SearchType.ADJACENT: search_adjacent, SearchType.ADJACENT: search_adjacent,
SearchType.SIMILARITY: search_similarity, SearchType.SIMILARITY: search_similarity,
SearchType.CATEGORIES: search_categories, SearchType.CATEGORIES: search_categories,
SearchType.NEIGHBOR: search_neighbour, SearchType.NEIGHBOR: search_neighbour,
SearchType.SUMMARY: search_summary
} }
results = [] results = []
# Create a list to hold all the coroutine objects
search_tasks = [] search_tasks = []
for search_type, params in query_params.items(): for search_param in query_params:
search_func = search_functions.get(search_type) search_func = search_functions.get(search_param.search_type)
if search_func: if search_func:
# Schedule the coroutine for execution and store the task # Schedule the coroutine for execution and store the task
full_params = {**params, 'graph': graph} full_params = {**search_param.params, 'graph': graph}
task = search_func(**full_params) task = search_func(**full_params)
search_tasks.append(task) search_tasks.append(task)
@ -41,11 +64,30 @@ async def search(graph, query_params: Dict[SearchType, Dict[str, Any]]) -> List:
search_results = await asyncio.gather(*search_tasks) search_results = await asyncio.gather(*search_tasks)
# Update the results set with the results from all tasks # Update the results set with the results from all tasks
for search_result in search_results: results.extend(search_results)
results.append(search_result)
return results return results
if __name__ == "__main__":
from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
graph_client = get_graph_client(GraphDBType.NETWORKX)
async def main(graph_client):
await graph_client.load_graph_from_file()
graph = graph_client.graph
# Assuming 'graph' is your graph object, obtained from somewhere
search_type = 'ADJACENT'
params = {'query': 'example query', 'other_param': {"node_id": "LLM_LAYER_SUMMARY:DOCUMENT:881ecb36-2819-54c3-8147-ed80293084d6"}}
results = await perform_search(graph, search_type, params)
print(results)
# Run the async main function
asyncio.run(main(graph_client=graph_client))
# if __name__ == "__main__": # if __name__ == "__main__":
# import asyncio # import asyncio

View file

@ -0,0 +1 @@
Create labels that could be used to identify the data in the dataset

View file

@ -0,0 +1 @@
You are a summarization engine and you should sumamarize content. Be brief and concise

View file

@ -0,0 +1,28 @@
""" Here we update semantic graph with content that classifier produced"""
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType
async def add_label_nodes(document_id, classification_data):
graph_client = get_graph_client(GraphDBType.NETWORKX)
await graph_client.load_graph_from_file()
# Create the layer classification node ID
layer_classification_node_id = f"LLM_LAYER_LABEL:{document_id}"
# Add the node to the graph, unpacking the node data from the dictionary
await graph_client.add_node(layer_classification_node_id, **classification_data)
# Link this node to the corresponding document node
await graph_client.add_edge(document_id, layer_classification_node_id, relationship = "summarized_as")
# Create the detailed classification node ID
detailed_classification_node_id = f"LLM_SUMMARY:LABEL:{document_id}"
# Add the detailed classification node, reusing the same node data
await graph_client.add_node(detailed_classification_node_id, **classification_data)
# Link the detailed classification node to the layer classification node
await graph_client.add_edge(layer_classification_node_id, detailed_classification_node_id, relationship = "contains_label")
return True

View file

@ -0,0 +1,29 @@
""" Here we update semantic graph with content that classifier produced"""
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType
async def add_summary_nodes(document_id, classification_data):
graph_client = get_graph_client(GraphDBType.NETWORKX)
await graph_client.load_graph_from_file()
# Create the layer classification node ID
layer_classification_node_id = f"LLM_LAYER_SUMMARY:{document_id}"
# Add the node to the graph, unpacking the node data from the dictionary
await graph_client.add_node(layer_classification_node_id, **classification_data)
# Link this node to the corresponding document node
await graph_client.add_edge(document_id, layer_classification_node_id, relationship = "summarized_as")
# Create the detailed classification node ID
detailed_classification_node_id = f"LLM_SUMMARY:LAYER:{document_id}"
# Add the detailed classification node, reusing the same node data
await graph_client.add_node(detailed_classification_node_id, **classification_data)
# Link the detailed classification node to the layer classification node
await graph_client.add_edge(layer_classification_node_id, detailed_classification_node_id, relationship = "contains_summary")
return True

View file

@ -2,21 +2,25 @@ from datetime import datetime
from cognee.shared.data_models import DefaultGraphModel, Relationship, UserProperties, UserLocation from cognee.shared.data_models import DefaultGraphModel, Relationship, UserProperties, UserLocation
from cognee.modules.cognify.graph.create import create_semantic_graph from cognee.modules.cognify.graph.create import create_semantic_graph
async def initialize_graph(root_id: str): async def initialize_graph(root_id: str, graphdatamodel):
graph = DefaultGraphModel( if graphdatamodel:
id = root_id, graph = graphdatamodel(id= root_id)
user_properties = UserProperties( await create_semantic_graph(graph)
custom_properties = {"age": "30"}, else:
location = UserLocation( graph = DefaultGraphModel(
location_id = "ny", id=root_id,
description = "New York", user_properties=UserProperties(
default_relationship = Relationship(type = "located_in") custom_properties={"age": "30"},
) location=UserLocation(
), location_id="ny",
default_fields = { description="New York",
"created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), default_relationship=Relationship(type="located_in")
"updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S") )
} ),
) default_fields={
"created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
)
await create_semantic_graph(graph) await create_semantic_graph(graph)

View file

@ -0,0 +1,14 @@
""" This module contains the code to classify content into categories using the LLM API. """
from typing import Type, List
from pydantic import BaseModel
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.infrastructure.llm.get_llm_client import get_llm_client
async def label_content(text_input: str, system_prompt_file: str, response_model: Type[BaseModel]):
llm_client = get_llm_client()
system_prompt = read_query_prompt(system_prompt_file)
llm_output = await llm_client.acreate_structured_output(text_input, system_prompt, response_model)
return llm_output.dict()

View file

@ -0,0 +1,14 @@
""" This module contains the code to classify content into categories using the LLM API. """
from typing import Type, List
from pydantic import BaseModel
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.infrastructure.llm.get_llm_client import get_llm_client
async def summarize_content(text_input: str, system_prompt_file: str, response_model: Type[BaseModel]):
llm_client = get_llm_client()
system_prompt = read_query_prompt(system_prompt_file)
llm_output = await llm_client.acreate_structured_output(text_input, system_prompt, response_model)
return llm_output.dict()

View file

@ -1,19 +1,24 @@
""" This module contains the function to find the neighbours of a given node in the graph""" """ This module contains the function to find the neighbours of a given node in the graph"""
def search_adjacent(G, node_id:str)->dict: async def search_adjacent(graph,query:str, other_param:dict = None)->dict:
""" Find the neighbours of a given node in the graph """ Find the neighbours of a given node in the graph
:param G: A NetworkX graph object :param graph: A NetworkX graph object
:param node_id: The unique identifier of the node
:return: A dictionary containing the unique identifiers and descriptions of the neighbours of the given node :return: A dictionary containing the unique identifiers and descriptions of the neighbours of the given node
""" """
neighbors = list(G.neighbors(node_id)) node_id = other_param.get('node_id') if other_param else None
if node_id is None or node_id not in graph:
return {}
neighbors = list(graph.neighbors(node_id))
neighbor_descriptions = {} neighbor_descriptions = {}
for neighbor in neighbors: for neighbor in neighbors:
# Access the 'description' attribute for each neighbor # Access the 'description' attribute for each neighbor
# The get method returns None if 'description' attribute does not exist for the node # The get method returns None if 'description' attribute does not exist for the node
neighbor_descriptions[neighbor] = G.nodes[neighbor].get('description') neighbor_descriptions[neighbor] = graph.nodes[neighbor].get('description')
return neighbor_descriptions return neighbor_descriptions

View file

@ -1,15 +1,18 @@
def search_categories(G, category): async def search_categories(graph, query:str, other_param:str = None):
""" """
Filter nodes by category. Filter nodes that contain 'LABEL' in their identifiers and return their summary attributes.
Parameters: Parameters:
- G (nx.Graph): The graph from which to filter nodes. - G (nx.Graph): The graph from which to filter nodes.
- category (str): The category to filter nodes by.
Returns: Returns:
- list: A list of nodes that belong to the specified category. - dict: A dictionary where keys are nodes containing 'SUMMARY' in their identifiers,
and values are their 'summary' attributes.
""" """
return [node for node, data in G.nodes(data=True) if data.get('category') == category] return {node: data.get('content_labels') for node, data in graph.nodes(data=True) if 'LABEL' in node and 'content_labels' in data}

View file

@ -1,10 +1,16 @@
""" Fetches the context of a given node in the graph""" """ Fetches the context of a given node in the graph"""
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
async def search_neighbour(CONNECTED_GRAPH, id): async def search_neighbour(graph, id,other_param:dict = None):
node_id = other_param.get('node_id') if other_param else None
if node_id is None or node_id not in graph:
return {}
relevant_context = [] relevant_context = []
for n,attr in CONNECTED_GRAPH.nodes(data=True): for n,attr in graph.nodes(data=True):
if id in n: if id in n:
for n_, attr_ in CONNECTED_GRAPH.nodes(data=True): for n_, attr_ in graph.nodes(data=True):
relevant_layer = attr['layer_uuid'] relevant_layer = attr['layer_uuid']
if attr_.get('layer_uuid') == relevant_layer: if attr_.get('layer_uuid') == relevant_layer:
@ -14,16 +20,16 @@ async def search_neighbour(CONNECTED_GRAPH, id):
if __name__ == '__main__': # if __name__ == '__main__':
import asyncio # import asyncio
async def main(): # async def main():
from cognee.shared.data_models import GraphDBType # from cognee.shared.data_models import GraphDBType
#
graph_client = get_graph_client(GraphDBType.NETWORKX) # graph_client = get_graph_client(GraphDBType.NETWORKX)
graph = await graph_client.graph # graph = await graph_client.graph
#
await fetch_context(graph, "1") # await fetch_context(graph, "1")
#
asyncio.run(main()) # asyncio.run(main())

View file

@ -0,0 +1,18 @@
async def search_summary(graph, query:str, other_param:str = None):
"""
Filter nodes that contain 'SUMMARY' in their identifiers and return their summary attributes.
Parameters:
- G (nx.Graph): The graph from which to filter nodes.
Returns:
- dict: A dictionary where keys are nodes containing 'SUMMARY' in their identifiers,
and values are their 'summary' attributes.
"""
return {node: data.get('summary') for node, data in graph.nodes(data=True) if 'SUMMARY' in node and 'summary' in data}

View file

@ -3,7 +3,7 @@ from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.modules.cognify.graph.add_node_connections import extract_node_descriptions from cognee.modules.cognify.graph.add_node_connections import extract_node_descriptions
from cognee.infrastructure.databases.vector.get_vector_database import get_vector_database from cognee.infrastructure.databases.vector.get_vector_database import get_vector_database
async def search_similarity(query:str ,graph): async def search_similarity(query:str ,graph,other_param:str = None):
node_descriptions = await extract_node_descriptions(graph.nodes(data = True)) node_descriptions = await extract_node_descriptions(graph.nodes(data = True))

View file

@ -161,6 +161,19 @@ class DefaultContentPrediction(BaseModel):
label: Union[TextContent, AudioContent, ImageContent, VideoContent, MultimediaContent, Model3DContent, ProceduralContent] label: Union[TextContent, AudioContent, ImageContent, VideoContent, MultimediaContent, Model3DContent, ProceduralContent]
class SummarizedContent(BaseModel):
"""Class for a single class label summary."""
summary: str
class LabeledContent(BaseModel):
"""Class for a single class label summary."""
content_labels: str
class CognitiveLayerSubgroup(BaseModel): class CognitiveLayerSubgroup(BaseModel):
""" CognitiveLayerSubgroup in a general layer """ """ CognitiveLayerSubgroup in a general layer """
id: int id: int

View file

@ -18,18 +18,18 @@ else
fi fi
echo "Creating database..." echo "Creating database..."
#
PYTHONPATH=. python cognee/setup_database.py #PYTHONPATH=. python cognee/setup_database.py
if [ $? -ne 0 ]; then #if [ $? -ne 0 ]; then
echo "Error: setup_database.py failed" # echo "Error: setup_database.py failed"
exit 1 # exit 1
fi #fi
echo "Starting Gunicorn" echo "Starting Gunicorn"
if [ "$DEBUG" = true ]; then if [ "$DEBUG" = true ]; then
echo "Waiting for the debugger to attach..." echo "Waiting for the debugger to attach..."
python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug api:app python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug cognee.api.client:app
else else
gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug api:app gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug cognee.api.client:app
fi fi