Update docs
This commit is contained in:
parent
d96c51aeaf
commit
6eb826a690
20 changed files with 5241 additions and 4790 deletions
9416
Demo_graph.ipynb
9416
Demo_graph.ipynb
File diff suppressed because one or more lines are too long
|
|
@ -44,8 +44,7 @@ WORKDIR /app
|
|||
ENV PYTHONPATH=/app
|
||||
|
||||
COPY cognee/ /app/cognee
|
||||
COPY main.py /app
|
||||
COPY api.py /app
|
||||
|
||||
|
||||
COPY entrypoint.sh /app/entrypoint.sh
|
||||
RUN chmod +x /app/entrypoint.sh
|
||||
|
|
|
|||
|
|
@ -202,10 +202,10 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Pipeline file_load_from_filesystem load step completed in 0.34 seconds\n",
|
||||
"Pipeline file_load_from_filesystem load step completed in 0.30 seconds\n",
|
||||
"1 load package(s) were loaded to destination duckdb and into dataset izmene\n",
|
||||
"The duckdb destination used duckdb:///:external: location to store data\n",
|
||||
"Load package 1710586934.8904011 is LOADED and contains no failed jobs\n"
|
||||
"Load package 1710664582.887609 is LOADED and contains no failed jobs\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -231,7 +231,7 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/borisarzentar/Projects/Topoteretes/cognee/cognee/data/cognee/cognee.duckdb\n",
|
||||
"/Users/vasa/Projects/cognee/cognee/data/cognee/cognee.duckdb\n",
|
||||
"['izmene']\n"
|
||||
]
|
||||
}
|
||||
|
|
@ -274,12 +274,22 @@
|
|||
"text": [
|
||||
"['izmene']\n",
|
||||
"Processing document (881ecb36-2819-54c3-8147-ed80293084d6)\n",
|
||||
"Document (881ecb36-2819-54c3-8147-ed80293084d6) categorized: ['Laws, regulations, and legal case documents']\n",
|
||||
"Document (881ecb36-2819-54c3-8147-ed80293084d6) layer graphs created\n",
|
||||
"Document (881ecb36-2819-54c3-8147-ed80293084d6) layers connected\n",
|
||||
"Document (881ecb36-2819-54c3-8147-ed80293084d6) processed\n",
|
||||
"Graph is visualized at: https://hub.graphistry.com/graph/graph.html?dataset=037fedd82ee3490aaf52ee8e535cc3fc&type=arrow&viztoken=f05741c9-99ea-4397-a99e-34c57949c17b&usertag=e15d5b35-pygraphistry-0.33.5&splashAfter=1710587061&info=true\n",
|
||||
"None\n"
|
||||
"name 'label_content' is not defined\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "NameError",
|
||||
"evalue": "name 'label_content' is not defined",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[3], line 7\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcognee\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m render_graph\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(list_datasets())\n\u001b[0;32m----> 7\u001b[0m graph \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m cognify()\n\u001b[1;32m 9\u001b[0m graph_url \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m render_graph(graph, graph_type \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnetworkx\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28mprint\u001b[39m(graph_url)\n",
|
||||
"File \u001b[0;32m~/Projects/cognee/cognee/api/v1/cognify/cognify.py:53\u001b[0m, in \u001b[0;36mcognify\u001b[0;34m(datasets, graphdatamodel)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m dataset \u001b[38;5;129;01min\u001b[39;00m datasets:\n\u001b[1;32m 51\u001b[0m awaitables\u001b[38;5;241m.\u001b[39mappend(cognify(dataset))\n\u001b[0;32m---> 53\u001b[0m graphs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39mawaitables)\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m graphs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 56\u001b[0m files_metadata \u001b[38;5;241m=\u001b[39m db\u001b[38;5;241m.\u001b[39mget_files_metadata(datasets)\n",
|
||||
"File \u001b[0;32m~/Projects/cognee/cognee/api/v1/cognify/cognify.py:69\u001b[0m, in \u001b[0;36mcognify\u001b[0;34m(datasets, graphdatamodel)\u001b[0m\n\u001b[1;32m 65\u001b[0m text \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mmap\u001b[39m(\u001b[38;5;28;01mlambda\u001b[39;00m element: clean(element\u001b[38;5;241m.\u001b[39mtext), elements))\n\u001b[1;32m 67\u001b[0m awaitables\u001b[38;5;241m.\u001b[39mappend(process_text(text, file_metadata))\n\u001b[0;32m---> 69\u001b[0m graphs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39mawaitables)\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m graphs[\u001b[38;5;241m0\u001b[39m]\n",
|
||||
"File \u001b[0;32m~/Projects/cognee/cognee/api/v1/cognify/cognify.py:112\u001b[0m, in \u001b[0;36mprocess_text\u001b[0;34m(input_text, file_metadata)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28mprint\u001b[39m(e)\n\u001b[0;32m--> 112\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m add_document_node(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDefaultGraphModel:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mUSER_ID\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, file_metadata)\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDocument (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_metadata[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mid\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) categorized: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_metadata[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcategories\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
|
||||
"File \u001b[0;32m~/Projects/cognee/cognee/api/v1/cognify/cognify.py:104\u001b[0m, in \u001b[0;36mprocess_text\u001b[0;34m(input_text, file_metadata)\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 103\u001b[0m \u001b[38;5;66;03m# Classify the content into categories\u001b[39;00m\n\u001b[0;32m--> 104\u001b[0m content_labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[43mlabel_content\u001b[49m(\n\u001b[1;32m 105\u001b[0m input_text,\n\u001b[1;32m 106\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlabel_content.txt\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 107\u001b[0m SummarizedContent\n\u001b[1;32m 108\u001b[0m )\n\u001b[1;32m 109\u001b[0m file_metadata[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msummary\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m content_summary[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msummary\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
||||
"\u001b[0;31mNameError\u001b[0m: name 'label_content' is not defined"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
@ -296,6 +306,88 @@
|
|||
"print(graph_url)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "a0918362-e864-414f-902c-57ce7da6c319",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
" from cognee.shared.data_models import GraphDBType\n",
|
||||
" from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client\n",
|
||||
" graph_client = get_graph_client(GraphDBType.NETWORKX)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "1878572f-fa96-4953-b1d2-f2b0614a7d8f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cognee.utils import render_graph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "497ab9c0-2db9-4d5c-b140-bd17226712df",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "TypeError",
|
||||
"evalue": "'MultiDiGraph' object is not callable",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[8], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m nodes, edges \u001b[38;5;129;01min\u001b[39;00m \u001b[43mgraph_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgraph\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(nodes)\n",
|
||||
"\u001b[0;31mTypeError\u001b[0m: 'MultiDiGraph' object is not callable"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"for nodes, edges in graph_client.graph():\n",
|
||||
" print(nodes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "0919cd73-6ff9-40a7-90c6-a97f53d08364",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/vasa/Projects/cognee/.venv/lib/python3.10/site-packages/graphistry/util.py:276: RuntimeWarning: Graph has no edges, may have rendering issues\n",
|
||||
" warnings.warn(RuntimeWarning(msg))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Graph is visualized at: https://hub.graphistry.com/graph/graph.html?dataset=fde6391bff1a4b00af5cb631a4e2d48e&type=arrow&viztoken=bff2ce55-63ee-4671-926a-8166c32ef44c&usertag=1daaf574-pygraphistry-0.33.5&splashAfter=1710666472&info=true\n",
|
||||
"None\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"graph_url = await render_graph(graph_client.graph, graph_type = \"networkx\")\n",
|
||||
"print(graph_url)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "da6c866c-8150-4b8c-9857-3f0bfe434a97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
|
|
|
|||
135
cognee/api/client.py
Normal file
135
cognee/api/client.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
""" FastAPI server for the Cognee API. """
|
||||
|
||||
import os
|
||||
import json
|
||||
from uuid import UUID
|
||||
|
||||
import uvicorn
|
||||
from fastapi import Depends
|
||||
|
||||
import logging
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, # Set the logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
||||
format="%(asctime)s [%(levelname)s] %(message)s", # Set the log message format
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from cognee.config import Config
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
|
||||
from typing import Dict, Any, List, Union, BinaryIO
|
||||
from fastapi import FastAPI, BackgroundTasks, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
app = FastAPI(debug=True)
|
||||
#
|
||||
# from auth.cognito.JWTBearer import JWTBearer
|
||||
# from auth.auth import jwks
|
||||
#
|
||||
# auth = JWTBearer(jwks)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""
|
||||
Root endpoint that returns a welcome message.
|
||||
"""
|
||||
return {"message": "Hello, World, I am alive!"}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health_check():
|
||||
"""
|
||||
Health check endpoint that returns the server status.
|
||||
"""
|
||||
return {"status": "OK"}
|
||||
|
||||
|
||||
class Payload(BaseModel):
|
||||
payload: Dict[str, Any]
|
||||
|
||||
|
||||
class AddPayload(BaseModel):
|
||||
data: Union[str, BinaryIO, List[Union[str, BinaryIO]]]
|
||||
dataset_id: UUID
|
||||
dataset_name: str
|
||||
class Config:
|
||||
arbitrary_types_allowed = True # This is required to allow the use of Union
|
||||
class CognifyPayload(BaseModel):
|
||||
datasets: Union[str, List[str]]
|
||||
|
||||
class SearchPayload(BaseModel):
|
||||
query_params: Dict[str, Any]
|
||||
@app.post("/add", response_model=dict)
|
||||
async def add(payload: AddPayload):
|
||||
""" This endpoint is responsible for adding data to the graph."""
|
||||
from v1.add.add_standalone import add_standalone
|
||||
|
||||
try:
|
||||
await add_standalone(
|
||||
payload.data,
|
||||
payload.dataset_id,
|
||||
payload.dataset_name,
|
||||
)
|
||||
except Exception as error:
|
||||
return JSONResponse(
|
||||
status_code = 409,
|
||||
content = { "error": error }
|
||||
)
|
||||
|
||||
@app.post("/cognify", response_model=dict)
|
||||
async def cognify(payload: CognifyPayload):
|
||||
""" This endpoint is responsible for the cognitive processing of the content."""
|
||||
from v1.cognify.cognify import cognify
|
||||
|
||||
try:
|
||||
await cognify(
|
||||
payload.datasets,
|
||||
)
|
||||
except Exception as error:
|
||||
return JSONResponse(
|
||||
status_code = 409,
|
||||
content = { "error": error }
|
||||
)
|
||||
|
||||
|
||||
@app.post("/search", response_model=dict)
|
||||
async def search(payload: SearchPayload):
|
||||
""" This endpoint is responsible for searching for nodes in the graph."""
|
||||
from v1.search.search import search
|
||||
|
||||
try:
|
||||
await search(
|
||||
payload.query_params,
|
||||
)
|
||||
except Exception as error:
|
||||
return JSONResponse(
|
||||
status_code = 409,
|
||||
content = { "error": error }
|
||||
)
|
||||
|
||||
|
||||
|
||||
def start_api_server(host: str = "0.0.0.0", port: int = 8000):
|
||||
"""
|
||||
Start the API server using uvicorn.
|
||||
Parameters:
|
||||
host (str): The host for the server.
|
||||
port (int): The port for the server.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Starting server at {host}:{port}")
|
||||
uvicorn.run(app, host=host, port=port)
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to start server: {e}")
|
||||
# Here you could add any cleanup code or error recovery code.
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
start_api_server()
|
||||
|
|
@ -9,10 +9,14 @@ from unstructured.partition.pdf import partition_pdf
|
|||
from cognee.infrastructure.databases.vector.qdrant.QDrantAdapter import CollectionConfig
|
||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||
from cognee.modules.cognify.graph.add_classification_nodes import add_classification_nodes
|
||||
from cognee.modules.cognify.graph.add_label_nodes import add_label_nodes
|
||||
from cognee.modules.cognify.graph.add_node_connections import add_node_connection, graph_ready_output, \
|
||||
connect_nodes_in_graph, extract_node_descriptions
|
||||
from cognee.modules.cognify.graph.add_propositions import append_to_graph
|
||||
from cognee.modules.cognify.graph.add_summary_nodes import add_summary_nodes
|
||||
from cognee.modules.cognify.llm.add_node_connection_embeddings import process_items
|
||||
from cognee.modules.cognify.llm.label_content import label_content
|
||||
from cognee.modules.cognify.llm.summarize_content import summarize_content
|
||||
from cognee.modules.cognify.vector.batch_search import adapted_qdrant_batch_search
|
||||
from cognee.modules.cognify.vector.add_propositions import add_propositions
|
||||
|
||||
|
|
@ -20,7 +24,8 @@ from cognee.config import Config
|
|||
from cognee.modules.cognify.llm.classify_content import classify_into_categories
|
||||
from cognee.modules.cognify.llm.content_to_cog_layers import content_to_cog_layers
|
||||
from cognee.modules.cognify.llm.generate_graph import generate_graph
|
||||
from cognee.shared.data_models import DefaultContentPrediction, KnowledgeGraph, DefaultCognitiveLayer
|
||||
from cognee.shared.data_models import DefaultContentPrediction, KnowledgeGraph, DefaultCognitiveLayer, \
|
||||
SummarizedContent, LabeledContent
|
||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
from cognee.shared.data_models import GraphDBType
|
||||
from cognee.infrastructure.databases.vector.get_vector_database import get_vector_database
|
||||
|
|
@ -35,7 +40,7 @@ aclient = instructor.patch(OpenAI())
|
|||
|
||||
USER_ID = "default_user"
|
||||
|
||||
async def cognify(datasets: Union[str, List[str]] = None):
|
||||
async def cognify(datasets: Union[str, List[str]] = None, graphdatamodel: object = None):
|
||||
"""This function is responsible for the cognitive processing of the content."""
|
||||
|
||||
db = DuckDBAdapter()
|
||||
|
|
@ -56,7 +61,7 @@ async def cognify(datasets: Union[str, List[str]] = None):
|
|||
|
||||
awaitables = []
|
||||
|
||||
await initialize_graph(USER_ID)
|
||||
await initialize_graph(USER_ID,graphdatamodel)
|
||||
|
||||
for file_metadata in files_metadata:
|
||||
with open(file_metadata["file_path"], "rb") as file:
|
||||
|
|
@ -86,6 +91,30 @@ async def process_text(input_text: str, file_metadata: dict):
|
|||
print(e)
|
||||
raise e
|
||||
|
||||
try:
|
||||
# Classify the content into categories
|
||||
content_summary = await summarize_content(
|
||||
input_text,
|
||||
"summarize_content.txt",
|
||||
SummarizedContent
|
||||
)
|
||||
file_metadata["summary"] = content_summary["summary"]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
raise e
|
||||
|
||||
try:
|
||||
# Classify the content into categories
|
||||
content_labels = await label_content(
|
||||
input_text,
|
||||
"label_content.txt",
|
||||
LabeledContent
|
||||
)
|
||||
file_metadata["content_labels"] = content_labels["content_labels"]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
raise e
|
||||
|
||||
await add_document_node(f"DefaultGraphModel:{USER_ID}", file_metadata)
|
||||
print(f"Document ({file_metadata['id']}) categorized: {file_metadata['categories']}")
|
||||
|
||||
|
|
@ -112,10 +141,24 @@ async def process_text(input_text: str, file_metadata: dict):
|
|||
|
||||
await add_classification_nodes(f"DOCUMENT:{file_metadata['id']}", classified_categories[0])
|
||||
|
||||
# print(file_metadata['summary'])
|
||||
|
||||
await add_summary_nodes(f"DOCUMENT:{file_metadata['id']}", {"summary": file_metadata['summary']})
|
||||
|
||||
# print(file_metadata['content_labels'])
|
||||
|
||||
await add_label_nodes(f"DOCUMENT:{file_metadata['id']}", {"content_labels": file_metadata['content_labels']})
|
||||
|
||||
unique_layer_uuids = await append_to_graph(layer_graphs, classified_categories[0])
|
||||
|
||||
print(f"Document ({file_metadata['id']}) layers connected")
|
||||
|
||||
|
||||
|
||||
print(f"Document categories, summaries and metadata are ",str(classified_categories) )
|
||||
|
||||
print(f"Document metadata is ",str(file_metadata) )
|
||||
|
||||
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
|
||||
await graph_client.load_graph_from_file()
|
||||
|
|
@ -169,8 +212,12 @@ async def process_text(input_text: str, file_metadata: dict):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(cognify("""In the nicest possible way, Britons have always been a bit silly about animals. “Keeping pets, for the English, is not so much a leisure activity as it is an entire way of life,” wrote the anthropologist Kate Fox in Watching the English, nearly 20 years ago. Our dogs, in particular, have been an acceptable outlet for emotions and impulses we otherwise keep strictly controlled – our latent desire to be demonstratively affectionate, to be silly and chat to strangers. If this seems like an exaggeration, consider the different reactions you’d get if you struck up a conversation with someone in a park with a dog, versus someone on the train.
|
||||
Indeed, British society has been set up to accommodate these four-legged ambassadors. In the UK – unlike Australia, say, or New Zealand – dogs are not just permitted on public transport but often openly encouraged. Many pubs and shops display waggish signs, reading, “Dogs welcome, people tolerated”, and have treat jars on their counters. The other day, as I was waiting outside a cafe with a friend’s dog, the barista urged me to bring her inside.
|
||||
For years, Britons’ non-partisan passion for animals has been consistent amid dwindling common ground. But lately, rather than bringing out the best in us, our relationship with dogs is increasingly revealing us at our worst – and our supposed “best friends” are paying the price.
|
||||
As with so many latent traits in the national psyche, it all came unleashed with the pandemic, when many people thought they might as well make the most of all that time at home and in local parks with a dog. Between 2019 and 2022, the number of pet dogs in the UK rose from about nine million to 13 million. But there’s long been a seasonal surge around this time of year, substantial enough for the Dogs Trust charity to coin its famous slogan back in 1978: “A dog is for life, not just for Christmas.”
|
||||
"""))
|
||||
|
||||
async def main():
|
||||
graph = await cognify(datasets=['izmene'])
|
||||
from cognee.utils import render_graph
|
||||
graph_url = await render_graph(graph, graph_type="networkx")
|
||||
print(graph_url)
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
|
|
@ -2,38 +2,61 @@
|
|||
import asyncio
|
||||
from enum import Enum, auto
|
||||
from typing import Dict, Any, Callable, List
|
||||
|
||||
from pydantic import BaseModel, validator
|
||||
from cognee.modules.search.graph.search_adjacent import search_adjacent
|
||||
from cognee.modules.search.vector.search_similarity import search_similarity
|
||||
from cognee.modules.search.graph.search_categories import search_categories
|
||||
from cognee.modules.search.graph.search_neighbour import search_neighbour
|
||||
from cognee.modules.search.graph.search_summary import search_summary
|
||||
|
||||
|
||||
class SearchType(Enum):
|
||||
ADJACENT = auto()
|
||||
SIMILARITY = auto()
|
||||
CATEGORIES = auto()
|
||||
NEIGHBOR = auto()
|
||||
ADJACENT = 'ADJACENT'
|
||||
SIMILARITY = 'SIMILARITY'
|
||||
CATEGORIES = 'CATEGORIES'
|
||||
NEIGHBOR = 'NEIGHBOR'
|
||||
SUMMARY = 'SUMMARY'
|
||||
|
||||
@staticmethod
|
||||
def from_str(name: str):
|
||||
try:
|
||||
return SearchType[name.upper()]
|
||||
except KeyError:
|
||||
raise ValueError(f"{name} is not a valid SearchType")
|
||||
|
||||
class SearchParameters(BaseModel):
|
||||
search_type: SearchType
|
||||
params: Dict[str, Any]
|
||||
|
||||
@validator('search_type', pre=True)
|
||||
def convert_string_to_enum(cls, value):
|
||||
if isinstance(value, str):
|
||||
return SearchType.from_str(value)
|
||||
return value
|
||||
|
||||
|
||||
async def search(graph, query_params: Dict[SearchType, Dict[str, Any]]) -> List:
|
||||
async def perform_search(graph, search_type: str, params: Dict[str, Any]) -> List:
|
||||
search_params = SearchParameters(search_type=search_type, params=params)
|
||||
return await search(graph, [search_params])
|
||||
|
||||
|
||||
async def search(graph, query_params: List[SearchParameters]) -> List:
|
||||
search_functions: Dict[SearchType, Callable] = {
|
||||
SearchType.ADJACENT: search_adjacent,
|
||||
SearchType.SIMILARITY: search_similarity,
|
||||
SearchType.CATEGORIES: search_categories,
|
||||
SearchType.NEIGHBOR: search_neighbour,
|
||||
SearchType.SUMMARY: search_summary
|
||||
}
|
||||
|
||||
results = []
|
||||
|
||||
# Create a list to hold all the coroutine objects
|
||||
search_tasks = []
|
||||
|
||||
for search_type, params in query_params.items():
|
||||
search_func = search_functions.get(search_type)
|
||||
for search_param in query_params:
|
||||
search_func = search_functions.get(search_param.search_type)
|
||||
if search_func:
|
||||
# Schedule the coroutine for execution and store the task
|
||||
full_params = {**params, 'graph': graph}
|
||||
full_params = {**search_param.params, 'graph': graph}
|
||||
task = search_func(**full_params)
|
||||
search_tasks.append(task)
|
||||
|
||||
|
|
@ -41,11 +64,30 @@ async def search(graph, query_params: Dict[SearchType, Dict[str, Any]]) -> List:
|
|||
search_results = await asyncio.gather(*search_tasks)
|
||||
|
||||
# Update the results set with the results from all tasks
|
||||
for search_result in search_results:
|
||||
results.append(search_result)
|
||||
results.extend(search_results)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from cognee.shared.data_models import GraphDBType
|
||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
|
||||
|
||||
async def main(graph_client):
|
||||
await graph_client.load_graph_from_file()
|
||||
graph = graph_client.graph
|
||||
# Assuming 'graph' is your graph object, obtained from somewhere
|
||||
search_type = 'ADJACENT'
|
||||
params = {'query': 'example query', 'other_param': {"node_id": "LLM_LAYER_SUMMARY:DOCUMENT:881ecb36-2819-54c3-8147-ed80293084d6"}}
|
||||
|
||||
results = await perform_search(graph, search_type, params)
|
||||
print(results)
|
||||
|
||||
# Run the async main function
|
||||
asyncio.run(main(graph_client=graph_client))
|
||||
# if __name__ == "__main__":
|
||||
# import asyncio
|
||||
|
||||
|
|
|
|||
1
cognee/infrastructure/llm/prompts/label_content.txt
Normal file
1
cognee/infrastructure/llm/prompts/label_content.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
Create labels that could be used to identify the data in the dataset
|
||||
1
cognee/infrastructure/llm/prompts/summarize_content.txt
Normal file
1
cognee/infrastructure/llm/prompts/summarize_content.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
You are a summarization engine and you should sumamarize content. Be brief and concise
|
||||
28
cognee/modules/cognify/graph/add_label_nodes.py
Normal file
28
cognee/modules/cognify/graph/add_label_nodes.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
""" Here we update semantic graph with content that classifier produced"""
|
||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType
|
||||
|
||||
|
||||
async def add_label_nodes(document_id, classification_data):
|
||||
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
|
||||
await graph_client.load_graph_from_file()
|
||||
|
||||
# Create the layer classification node ID
|
||||
layer_classification_node_id = f"LLM_LAYER_LABEL:{document_id}"
|
||||
|
||||
# Add the node to the graph, unpacking the node data from the dictionary
|
||||
await graph_client.add_node(layer_classification_node_id, **classification_data)
|
||||
|
||||
# Link this node to the corresponding document node
|
||||
await graph_client.add_edge(document_id, layer_classification_node_id, relationship = "summarized_as")
|
||||
|
||||
# Create the detailed classification node ID
|
||||
detailed_classification_node_id = f"LLM_SUMMARY:LABEL:{document_id}"
|
||||
|
||||
# Add the detailed classification node, reusing the same node data
|
||||
await graph_client.add_node(detailed_classification_node_id, **classification_data)
|
||||
|
||||
# Link the detailed classification node to the layer classification node
|
||||
await graph_client.add_edge(layer_classification_node_id, detailed_classification_node_id, relationship = "contains_label")
|
||||
|
||||
return True
|
||||
29
cognee/modules/cognify/graph/add_summary_nodes.py
Normal file
29
cognee/modules/cognify/graph/add_summary_nodes.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
""" Here we update semantic graph with content that classifier produced"""
|
||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType
|
||||
|
||||
|
||||
async def add_summary_nodes(document_id, classification_data):
|
||||
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
|
||||
await graph_client.load_graph_from_file()
|
||||
|
||||
|
||||
# Create the layer classification node ID
|
||||
layer_classification_node_id = f"LLM_LAYER_SUMMARY:{document_id}"
|
||||
|
||||
# Add the node to the graph, unpacking the node data from the dictionary
|
||||
await graph_client.add_node(layer_classification_node_id, **classification_data)
|
||||
|
||||
# Link this node to the corresponding document node
|
||||
await graph_client.add_edge(document_id, layer_classification_node_id, relationship = "summarized_as")
|
||||
|
||||
# Create the detailed classification node ID
|
||||
detailed_classification_node_id = f"LLM_SUMMARY:LAYER:{document_id}"
|
||||
|
||||
# Add the detailed classification node, reusing the same node data
|
||||
await graph_client.add_node(detailed_classification_node_id, **classification_data)
|
||||
|
||||
# Link the detailed classification node to the layer classification node
|
||||
await graph_client.add_edge(layer_classification_node_id, detailed_classification_node_id, relationship = "contains_summary")
|
||||
|
||||
return True
|
||||
|
|
@ -2,21 +2,25 @@ from datetime import datetime
|
|||
from cognee.shared.data_models import DefaultGraphModel, Relationship, UserProperties, UserLocation
|
||||
from cognee.modules.cognify.graph.create import create_semantic_graph
|
||||
|
||||
async def initialize_graph(root_id: str):
|
||||
graph = DefaultGraphModel(
|
||||
id = root_id,
|
||||
user_properties = UserProperties(
|
||||
custom_properties = {"age": "30"},
|
||||
location = UserLocation(
|
||||
location_id = "ny",
|
||||
description = "New York",
|
||||
default_relationship = Relationship(type = "located_in")
|
||||
)
|
||||
),
|
||||
default_fields = {
|
||||
"created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
)
|
||||
async def initialize_graph(root_id: str, graphdatamodel):
|
||||
if graphdatamodel:
|
||||
graph = graphdatamodel(id= root_id)
|
||||
await create_semantic_graph(graph)
|
||||
else:
|
||||
graph = DefaultGraphModel(
|
||||
id=root_id,
|
||||
user_properties=UserProperties(
|
||||
custom_properties={"age": "30"},
|
||||
location=UserLocation(
|
||||
location_id="ny",
|
||||
description="New York",
|
||||
default_relationship=Relationship(type="located_in")
|
||||
)
|
||||
),
|
||||
default_fields={
|
||||
"created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
)
|
||||
|
||||
await create_semantic_graph(graph)
|
||||
await create_semantic_graph(graph)
|
||||
|
|
|
|||
14
cognee/modules/cognify/llm/label_content.py
Normal file
14
cognee/modules/cognify/llm/label_content.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
""" This module contains the code to classify content into categories using the LLM API. """
|
||||
from typing import Type, List
|
||||
from pydantic import BaseModel
|
||||
from cognee.infrastructure.llm.prompts import read_query_prompt
|
||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||
|
||||
async def label_content(text_input: str, system_prompt_file: str, response_model: Type[BaseModel]):
|
||||
llm_client = get_llm_client()
|
||||
|
||||
system_prompt = read_query_prompt(system_prompt_file)
|
||||
|
||||
llm_output = await llm_client.acreate_structured_output(text_input, system_prompt, response_model)
|
||||
|
||||
return llm_output.dict()
|
||||
14
cognee/modules/cognify/llm/summarize_content.py
Normal file
14
cognee/modules/cognify/llm/summarize_content.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
""" This module contains the code to classify content into categories using the LLM API. """
|
||||
from typing import Type, List
|
||||
from pydantic import BaseModel
|
||||
from cognee.infrastructure.llm.prompts import read_query_prompt
|
||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||
|
||||
async def summarize_content(text_input: str, system_prompt_file: str, response_model: Type[BaseModel]):
|
||||
llm_client = get_llm_client()
|
||||
|
||||
system_prompt = read_query_prompt(system_prompt_file)
|
||||
|
||||
llm_output = await llm_client.acreate_structured_output(text_input, system_prompt, response_model)
|
||||
|
||||
return llm_output.dict()
|
||||
|
|
@ -1,19 +1,24 @@
|
|||
""" This module contains the function to find the neighbours of a given node in the graph"""
|
||||
|
||||
|
||||
def search_adjacent(G, node_id:str)->dict:
|
||||
async def search_adjacent(graph,query:str, other_param:dict = None)->dict:
|
||||
""" Find the neighbours of a given node in the graph
|
||||
:param G: A NetworkX graph object
|
||||
:param node_id: The unique identifier of the node
|
||||
:param graph: A NetworkX graph object
|
||||
|
||||
:return: A dictionary containing the unique identifiers and descriptions of the neighbours of the given node
|
||||
"""
|
||||
|
||||
neighbors = list(G.neighbors(node_id))
|
||||
node_id = other_param.get('node_id') if other_param else None
|
||||
|
||||
if node_id is None or node_id not in graph:
|
||||
return {}
|
||||
|
||||
neighbors = list(graph.neighbors(node_id))
|
||||
neighbor_descriptions = {}
|
||||
|
||||
for neighbor in neighbors:
|
||||
# Access the 'description' attribute for each neighbor
|
||||
# The get method returns None if 'description' attribute does not exist for the node
|
||||
neighbor_descriptions[neighbor] = G.nodes[neighbor].get('description')
|
||||
neighbor_descriptions[neighbor] = graph.nodes[neighbor].get('description')
|
||||
|
||||
return neighbor_descriptions
|
||||
|
|
@ -1,15 +1,18 @@
|
|||
|
||||
|
||||
|
||||
def search_categories(G, category):
|
||||
async def search_categories(graph, query:str, other_param:str = None):
|
||||
"""
|
||||
Filter nodes by category.
|
||||
Filter nodes that contain 'LABEL' in their identifiers and return their summary attributes.
|
||||
|
||||
Parameters:
|
||||
- G (nx.Graph): The graph from which to filter nodes.
|
||||
- category (str): The category to filter nodes by.
|
||||
|
||||
Returns:
|
||||
- list: A list of nodes that belong to the specified category.
|
||||
- dict: A dictionary where keys are nodes containing 'SUMMARY' in their identifiers,
|
||||
and values are their 'summary' attributes.
|
||||
"""
|
||||
return [node for node, data in G.nodes(data=True) if data.get('category') == category]
|
||||
return {node: data.get('content_labels') for node, data in graph.nodes(data=True) if 'LABEL' in node and 'content_labels' in data}
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,16 @@
|
|||
""" Fetches the context of a given node in the graph"""
|
||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
async def search_neighbour(CONNECTED_GRAPH, id):
|
||||
async def search_neighbour(graph, id,other_param:dict = None):
|
||||
|
||||
node_id = other_param.get('node_id') if other_param else None
|
||||
|
||||
if node_id is None or node_id not in graph:
|
||||
return {}
|
||||
|
||||
relevant_context = []
|
||||
for n,attr in CONNECTED_GRAPH.nodes(data=True):
|
||||
for n,attr in graph.nodes(data=True):
|
||||
if id in n:
|
||||
for n_, attr_ in CONNECTED_GRAPH.nodes(data=True):
|
||||
for n_, attr_ in graph.nodes(data=True):
|
||||
relevant_layer = attr['layer_uuid']
|
||||
|
||||
if attr_.get('layer_uuid') == relevant_layer:
|
||||
|
|
@ -14,16 +20,16 @@ async def search_neighbour(CONNECTED_GRAPH, id):
|
|||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import asyncio
|
||||
async def main():
|
||||
from cognee.shared.data_models import GraphDBType
|
||||
|
||||
graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
graph = await graph_client.graph
|
||||
|
||||
await fetch_context(graph, "1")
|
||||
|
||||
asyncio.run(main())
|
||||
# if __name__ == '__main__':
|
||||
# import asyncio
|
||||
# async def main():
|
||||
# from cognee.shared.data_models import GraphDBType
|
||||
#
|
||||
# graph_client = get_graph_client(GraphDBType.NETWORKX)
|
||||
# graph = await graph_client.graph
|
||||
#
|
||||
# await fetch_context(graph, "1")
|
||||
#
|
||||
# asyncio.run(main())
|
||||
|
||||
|
||||
|
|
|
|||
18
cognee/modules/search/graph/search_summary.py
Normal file
18
cognee/modules/search/graph/search_summary.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
|
||||
|
||||
|
||||
async def search_summary(graph, query:str, other_param:str = None):
|
||||
"""
|
||||
Filter nodes that contain 'SUMMARY' in their identifiers and return their summary attributes.
|
||||
|
||||
Parameters:
|
||||
- G (nx.Graph): The graph from which to filter nodes.
|
||||
|
||||
Returns:
|
||||
- dict: A dictionary where keys are nodes containing 'SUMMARY' in their identifiers,
|
||||
and values are their 'summary' attributes.
|
||||
"""
|
||||
return {node: data.get('summary') for node, data in graph.nodes(data=True) if 'SUMMARY' in node and 'summary' in data}
|
||||
|
||||
|
||||
|
||||
|
|
@ -3,7 +3,7 @@ from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
|||
from cognee.modules.cognify.graph.add_node_connections import extract_node_descriptions
|
||||
from cognee.infrastructure.databases.vector.get_vector_database import get_vector_database
|
||||
|
||||
async def search_similarity(query:str ,graph):
|
||||
async def search_similarity(query:str ,graph,other_param:str = None):
|
||||
|
||||
node_descriptions = await extract_node_descriptions(graph.nodes(data = True))
|
||||
|
||||
|
|
|
|||
|
|
@ -161,6 +161,19 @@ class DefaultContentPrediction(BaseModel):
|
|||
label: Union[TextContent, AudioContent, ImageContent, VideoContent, MultimediaContent, Model3DContent, ProceduralContent]
|
||||
|
||||
|
||||
|
||||
class SummarizedContent(BaseModel):
|
||||
"""Class for a single class label summary."""
|
||||
|
||||
summary: str
|
||||
|
||||
class LabeledContent(BaseModel):
|
||||
"""Class for a single class label summary."""
|
||||
|
||||
content_labels: str
|
||||
|
||||
|
||||
|
||||
class CognitiveLayerSubgroup(BaseModel):
|
||||
""" CognitiveLayerSubgroup in a general layer """
|
||||
id: int
|
||||
|
|
|
|||
|
|
@ -18,18 +18,18 @@ else
|
|||
fi
|
||||
|
||||
echo "Creating database..."
|
||||
|
||||
PYTHONPATH=. python cognee/setup_database.py
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error: setup_database.py failed"
|
||||
exit 1
|
||||
fi
|
||||
#
|
||||
#PYTHONPATH=. python cognee/setup_database.py
|
||||
#if [ $? -ne 0 ]; then
|
||||
# echo "Error: setup_database.py failed"
|
||||
# exit 1
|
||||
#fi
|
||||
|
||||
echo "Starting Gunicorn"
|
||||
|
||||
if [ "$DEBUG" = true ]; then
|
||||
echo "Waiting for the debugger to attach..."
|
||||
python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug api:app
|
||||
python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug cognee.api.client:app
|
||||
else
|
||||
gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug api:app
|
||||
gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug cognee.api.client:app
|
||||
fi
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue