fix: notebooks errors (#565)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Automatically creates a blank graph when a file isn’t found, ensuring smoother operations. - Updated demonstration notebooks with dynamic configurations, including refined search operations and input prompts. - Introduced optional support for additional graph functionalities via an integrated dependency. - **Refactor** - Streamlined processing by eliminating duplicate steps and simplifying graph rendering workflows. - **Chores** - Updated environment configurations and upgraded the Python runtime for improved performance and consistency. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
parent
811e932cae
commit
45f7c63322
5 changed files with 422 additions and 451 deletions
|
|
@ -243,6 +243,15 @@ class NetworkXAdapter(GraphDBInterface):
|
|||
|
||||
await self.save_graph_to_file(self.filename)
|
||||
|
||||
async def create_empty_graph(self, file_path: str) -> None:
|
||||
self.graph = nx.MultiDiGraph()
|
||||
|
||||
file_dir = os.path.dirname(file_path)
|
||||
if not os.path.exists(file_dir):
|
||||
os.makedirs(file_dir, exist_ok=True)
|
||||
|
||||
await self.save_graph_to_file(file_path)
|
||||
|
||||
async def save_graph_to_file(self, file_path: str = None) -> None:
|
||||
"""Asynchronously save the graph to a file in JSON format."""
|
||||
if not file_path:
|
||||
|
|
@ -322,19 +331,12 @@ class NetworkXAdapter(GraphDBInterface):
|
|||
else:
|
||||
# Log that the file does not exist and an empty graph is initialized
|
||||
logger.warning("File %s not found. Initializing an empty graph.", file_path)
|
||||
self.graph = (
|
||||
nx.MultiDiGraph()
|
||||
) # Use MultiDiGraph to keep it consistent with __init__
|
||||
await self.create_empty_graph(file_path)
|
||||
|
||||
file_dir = os.path.dirname(file_path)
|
||||
if not os.path.exists(file_dir):
|
||||
os.makedirs(file_dir, exist_ok=True)
|
||||
|
||||
await self.save_graph_to_file(file_path)
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.error("Failed to load graph from file: %s", file_path)
|
||||
raise e
|
||||
|
||||
await self.create_empty_graph(file_path)
|
||||
|
||||
async def delete_graph(self, file_path: str = None):
|
||||
"""Asynchronously delete the graph file from the filesystem."""
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@
|
|||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"GRAPHISTRY_USERNAME\"] = input(\"Please enter your graphistry username\")\n",
|
||||
"os.environ[\"GRAPHISTRY_PASSWORD\"] = input(\"Please enter your graphistry password\")"
|
||||
"os.environ[\"GRAPHISTRY_PASSWORD\"] = input(\"Please enter your graphistry password\")\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = input(\"Please enter your OpenAI API key\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -70,19 +71,16 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"from cognee.tasks.repo_processor import (\n",
|
||||
" enrich_dependency_graph,\n",
|
||||
" expand_dependency_graph,\n",
|
||||
" get_repo_file_dependencies,\n",
|
||||
")\n",
|
||||
"from cognee.tasks.storage import add_data_points\n",
|
||||
"from cognee.modules.pipelines.tasks.Task import Task\n",
|
||||
"\n",
|
||||
"detailed_extraction = True\n",
|
||||
"\n",
|
||||
"tasks = [\n",
|
||||
" Task(get_repo_file_dependencies),\n",
|
||||
" Task(add_data_points, task_config={\"batch_size\": 50}),\n",
|
||||
" Task(enrich_dependency_graph, task_config={\"batch_size\": 50}),\n",
|
||||
" Task(expand_dependency_graph, task_config={\"batch_size\": 50}),\n",
|
||||
" Task(add_data_points, task_config={\"batch_size\": 50}),\n",
|
||||
" Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction),\n",
|
||||
" Task(add_data_points, task_config={\"batch_size\": 100 if detailed_extraction else 500}),\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
|
|
@ -95,9 +93,6 @@
|
|||
"from cognee.modules.pipelines import run_tasks\n",
|
||||
"from uuid import uuid5, NAMESPACE_OID\n",
|
||||
"\n",
|
||||
"notebook_path = os.path.abspath(\"\")\n",
|
||||
"repo_clone_location = os.path.join(notebook_path, \".data/graphrag\")\n",
|
||||
"\n",
|
||||
"pipeline = run_tasks(tasks, uuid5(NAMESPACE_OID, repo_clone_location), repo_clone_location, \"code_graph_pipeline\")\n",
|
||||
"\n",
|
||||
"async for result in pipeline:\n",
|
||||
|
|
@ -112,7 +107,7 @@
|
|||
"source": [
|
||||
"from cognee.shared.utils import render_graph\n",
|
||||
"\n",
|
||||
"await render_graph(None, include_nodes=True, include_labels=True)"
|
||||
"await render_graph()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -128,46 +123,11 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from evals.eval_on_hotpot import eval_on_hotpotQA\n",
|
||||
"from evals.eval_on_hotpot import answer_with_cognee\n",
|
||||
"from evals.eval_on_hotpot import answer_without_cognee\n",
|
||||
"from evals.eval_on_hotpot import eval_answers\n",
|
||||
"from cognee.base_config import get_base_config\n",
|
||||
"from pathlib import Path\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"import wget\n",
|
||||
"import json\n",
|
||||
"import statistics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"answer_provider = answer_with_cognee # For native LLM answers use answer_without_cognee\n",
|
||||
"num_samples = 10 # With cognee, it takes ~1m10s per sample\n",
|
||||
"from cognee import search, SearchType\n",
|
||||
"\n",
|
||||
"base_config = get_base_config()\n",
|
||||
"data_root_dir = base_config.data_root_directory\n",
|
||||
"results = await search(query_type=SearchType.CODE, query_text=\"def create_graphrag_config\")\n",
|
||||
"\n",
|
||||
"if not Path(data_root_dir).exists():\n",
|
||||
" Path(data_root_dir).mkdir()\n",
|
||||
"\n",
|
||||
"filepath = data_root_dir / Path(\"hotpot_dev_fullwiki_v1.json\")\n",
|
||||
"if not filepath.exists():\n",
|
||||
" url = \"http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json\"\n",
|
||||
" wget.download(url, out=data_root_dir)\n",
|
||||
"\n",
|
||||
"with open(filepath, \"r\") as file:\n",
|
||||
" dataset = json.load(file)\n",
|
||||
"\n",
|
||||
"instances = dataset if not num_samples else dataset[:num_samples]\n",
|
||||
"answers = []\n",
|
||||
"for instance in tqdm(instances, desc=\"Getting answers\"):\n",
|
||||
" answer = answer_provider(instance)\n",
|
||||
" answers.append(answer)"
|
||||
"print(results)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-15T10:43:57.893763Z",
|
||||
|
|
@ -73,22 +73,15 @@
|
|||
"if \"OPENAI_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"# Graphiti integration is only tested with neo4j + pgvector + postgres for now\n",
|
||||
"GRAPH_DATABASE_PROVIDER = \"neo4j\"\n",
|
||||
"GRAPH_DATABASE_URL = \"bolt://localhost:7687\"\n",
|
||||
"GRAPH_DATABASE_USERNAME = \"neo4j\"\n",
|
||||
"GRAPH_DATABASE_PASSWORD = \"pleaseletmein\"\n",
|
||||
"GRAPH_DATABASE_URL = \"bolt://localhost:7687\"\n",
|
||||
"\n",
|
||||
"os.environ[\"VECTOR_DB_PROVIDER\"] = \"pgvector\"\n",
|
||||
"\n",
|
||||
"os.environ[\"DB_PROVIDER\"] = \"postgres\"\n",
|
||||
"\n",
|
||||
"os.environ[\"DB_NAME\"] = \"cognee_db\"\n",
|
||||
"\n",
|
||||
"os.environ[\"DB_HOST\"] = \"127.0.0.1\"\n",
|
||||
"os.environ[\"DB_PORT\"] = \"5432\"\n",
|
||||
"os.environ[\"DB_USERNAME\"] = \"cognee\"\n",
|
||||
"os.environ[\"DB_PASSWORD\"] = \"cognee\""
|
||||
"os.environ[\"GRAPH_DATABASE_PROVIDER\"] = GRAPH_DATABASE_PROVIDER\n",
|
||||
"os.environ[\"GRAPH_DATABASE_USERNAME\"] = GRAPH_DATABASE_USERNAME\n",
|
||||
"os.environ[\"GRAPH_DATABASE_PASSWORD\"] = GRAPH_DATABASE_PASSWORD\n",
|
||||
"os.environ[\"GRAPH_DATABASE_URL\"] = GRAPH_DATABASE_URL\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -100,7 +93,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-15T10:43:57.928664Z",
|
||||
|
|
@ -125,36 +118,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-01-15T10:44:25.008501Z",
|
||||
"start_time": "2025-01-15T10:43:57.932240Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Database deleted successfully.\n",
|
||||
"Database deleted successfully.\n",
|
||||
"User d3b51a32-38e1-4fe5-8270-6dc1d6ebfdf0 has registered.\n",
|
||||
"Pipeline file_load_from_filesystem load step completed in 0.10 seconds\n",
|
||||
"1 load package(s) were loaded to destination sqlalchemy and into dataset public\n",
|
||||
"The sqlalchemy destination used postgresql://cognee:***@127.0.0.1:5432/cognee_db location to store data\n",
|
||||
"Load package 1736937839.7739599 is LOADED and contains no failed jobs\n",
|
||||
"Pipeline file_load_from_filesystem load step completed in 0.06 seconds\n",
|
||||
"1 load package(s) were loaded to destination sqlalchemy and into dataset public\n",
|
||||
"The sqlalchemy destination used postgresql://cognee:***@127.0.0.1:5432/cognee_db location to store data\n",
|
||||
"Load package 1736937841.8467042 is LOADED and contains no failed jobs\n",
|
||||
"Graph database initialized.\n",
|
||||
"Added text: Kamala Harris is the Attorney Gener...\n",
|
||||
"Added text: As AG, Harris was in office from Ja...\n",
|
||||
"✅ Result Processed: <graphiti_core.graphiti.Graphiti object at 0x326fe0ce0>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 🔧 Setting Up Logging to Suppress Errors\n",
|
||||
"setup_logging(logging.ERROR) # Keeping logs clean and focused\n",
|
||||
|
|
@ -202,15 +173,7 @@
|
|||
"start_time": "2025-01-15T10:44:25.013325Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"💡 Answer: Kamala Harris was in office as Attorney General of California from January 3, 2011, to January 3, 2017.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Step 1: Formulating the Query 🔍\n",
|
||||
"query = \"When was Kamala Harris in office?\"\n",
|
||||
|
|
@ -260,7 +223,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.11.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
|||
728
poetry.lock
generated
728
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -86,6 +86,7 @@ plotly = {version = "^6.0.0", optional = true}
|
|||
gdown = {version = "^5.2.0", optional = true}
|
||||
pyside6 = {version = "^6.8.2.1", optional = true}
|
||||
qasync = {version = "^0.27.1", optional = true}
|
||||
graphiti-core = {version = "^0.7.0", optional = true}
|
||||
|
||||
|
||||
[tool.poetry.extras]
|
||||
|
|
@ -109,6 +110,7 @@ docs = ["unstructured"]
|
|||
codegraph = ["fastembed", "tree-sitter", "tree-sitter-python"]
|
||||
evals = ["plotly", "gdown"]
|
||||
gui = ["pyside6", "qasync"]
|
||||
graphiti = ["graphiti-core"]
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.4.0"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue