fix: Fix Jupyter Notebook (#142)
* fix: resolve issue with dlt sqlalchemy usage Cognee database configuration information was not handled properly by dlt, a new dlt handler moudle was made to handle database configuration propagation. * fix: resolve issue with jupyter notebook cognee add function uses old way of working in the notebook, updated it to work with the latest state of the cognee add function which doesn't return output. * fix: Remove empty DB_PATH argument from .env.template Empty value for DB_PATH in the .env file overrides default value for path intended to be used by cognee. ---------
This commit is contained in:
parent
2e70c23de8
commit
fcd60861ba
6 changed files with 77 additions and 123 deletions
|
|
@ -18,8 +18,14 @@ VECTOR_ENGINE_PROVIDER="qdrant" # or "weaviate" or "lancedb"
|
|||
VECTOR_DB_URL=
|
||||
VECTOR_DB_KEY=
|
||||
|
||||
# Database provider
|
||||
DB_PROVIDER="sqlite" # or "postgres"
|
||||
|
||||
# Database name
|
||||
DB_NAME=cognee_db
|
||||
|
||||
# Postgres specific parameters (Only if Postgres is run)
|
||||
DB_HOST=127.0.0.1
|
||||
DB_PORT=5432
|
||||
DB_USERNAME=cognee
|
||||
DB_PASSWORD=cognee
|
||||
DB_DATABASE=cognee_db
|
||||
|
|
|
|||
|
|
@ -8,8 +8,9 @@ from cognee.infrastructure.files.storage import LocalStorage
|
|||
from cognee.modules.ingestion import get_matched_datasets, save_data_to_file
|
||||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.base_config import get_base_config
|
||||
from cognee.infrastructure.databases.relational import get_relational_config, get_relational_engine, create_db_and_tables
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine, create_db_and_tables
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.tasks.ingestion import get_dlt_destination
|
||||
from cognee.modules.users.permissions.methods import give_permission_on_document
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.data.methods import create_dataset
|
||||
|
|
@ -78,18 +79,7 @@ async def add_files(file_paths: List[str], dataset_name: str, user: User = None)
|
|||
else:
|
||||
processed_file_paths.append(file_path)
|
||||
|
||||
relational_config = get_relational_config()
|
||||
|
||||
destination = dlt.destinations.sqlalchemy(
|
||||
credentials = {
|
||||
"host": relational_config.db_host,
|
||||
"port": relational_config.db_port,
|
||||
"username": relational_config.db_username,
|
||||
"password": relational_config.db_password,
|
||||
"database": relational_config.db_name,
|
||||
"drivername": relational_config.db_provider,
|
||||
},
|
||||
)
|
||||
destination = get_dlt_destination()
|
||||
|
||||
pipeline = dlt.pipeline(
|
||||
pipeline_name = "file_load_from_filesystem",
|
||||
|
|
|
|||
|
|
@ -38,6 +38,8 @@ class UUID(TypeDecorator):
|
|||
if value is None:
|
||||
return value
|
||||
if dialect.name == 'postgresql':
|
||||
if isinstance(value, uuid.UUID):
|
||||
return value
|
||||
return uuid.UUID(value)
|
||||
else:
|
||||
return uuid.UUID(bytes = value)
|
||||
|
|
|
|||
41
cognee/tasks/ingestion/get_dlt_destination.py
Normal file
41
cognee/tasks/ingestion/get_dlt_destination.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
import os
|
||||
import dlt
|
||||
from typing import Union
|
||||
|
||||
from cognee.infrastructure.databases.relational import get_relational_config
|
||||
|
||||
def get_dlt_destination() -> Union[type[dlt.destinations.sqlalchemy], None]:
|
||||
"""
|
||||
Handles propagation of the cognee database configuration to the dlt library
|
||||
|
||||
Returns:
|
||||
sqlachemy: sqlachemy destination used by the dlt library
|
||||
|
||||
"""
|
||||
relational_config = get_relational_config()
|
||||
|
||||
if relational_config.db_provider == "sqlite":
|
||||
# When sqlite is the database provider hostname, port, username and password should not be forwarded.
|
||||
# The database is found by combining the path location and the database name
|
||||
destination = dlt.destinations.sqlalchemy(
|
||||
credentials = {
|
||||
"database": os.path.join(relational_config.db_path, relational_config.db_name),
|
||||
"drivername": relational_config.db_provider,
|
||||
},
|
||||
)
|
||||
elif relational_config.db_provider == "postgres":
|
||||
# The dlt library doesn't accept postgres as the drivername, it only accepts postgresql
|
||||
destination = dlt.destinations.sqlalchemy(
|
||||
credentials = {
|
||||
"host": relational_config.db_host,
|
||||
"port": relational_config.db_port,
|
||||
"username": relational_config.db_username,
|
||||
"password": relational_config.db_password,
|
||||
"database": relational_config.db_name,
|
||||
"drivername": "postgresql",
|
||||
},
|
||||
)
|
||||
else:
|
||||
destination = None
|
||||
|
||||
return destination
|
||||
|
|
@ -6,20 +6,10 @@ from cognee.modules.users.models import User
|
|||
from cognee.infrastructure.databases.relational import get_relational_config, get_relational_engine
|
||||
from cognee.modules.data.methods import create_dataset
|
||||
from cognee.modules.users.permissions.methods import give_permission_on_document
|
||||
from .get_dlt_destination import get_dlt_destination
|
||||
|
||||
async def ingest_data(file_paths: list[str], dataset_name: str, user: User):
|
||||
relational_config = get_relational_config()
|
||||
|
||||
destination = dlt.destinations.sqlalchemy(
|
||||
credentials = {
|
||||
"host": relational_config.db_host,
|
||||
"port": relational_config.db_port,
|
||||
"username": relational_config.db_username,
|
||||
"password": relational_config.db_password,
|
||||
"database": relational_config.db_name,
|
||||
"drivername": relational_config.db_provider,
|
||||
},
|
||||
)
|
||||
destination = get_dlt_destination()
|
||||
|
||||
pipeline = dlt.pipeline(
|
||||
pipeline_name = "file_load_from_filesystem",
|
||||
|
|
|
|||
|
|
@ -1,44 +1,5 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "958375a6ffc0c2e4",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-09-20T14:02:47.336283Z",
|
||||
"start_time": "2024-09-20T14:02:43.652444Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"import logging\n",
|
||||
"from typing import Union\n",
|
||||
"\n",
|
||||
"from cognee.modules.cognify.config import get_cognify_config\n",
|
||||
"from cognee.shared.data_models import KnowledgeGraph\n",
|
||||
"from cognee.modules.data.models import Dataset, Data\n",
|
||||
"from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n",
|
||||
"from cognee.modules.data.methods import get_datasets, get_datasets_by_name\n",
|
||||
"from cognee.modules.pipelines.tasks.Task import Task\n",
|
||||
"from cognee.modules.pipelines import run_tasks, run_tasks_parallel\n",
|
||||
"from cognee.modules.users.models import User\n",
|
||||
"from cognee.modules.users.methods import get_default_user\n",
|
||||
"from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status\n",
|
||||
"from cognee.modules.pipelines.operations.log_pipeline_status import log_pipeline_status\n",
|
||||
"from cognee.tasks import chunk_extract_summary, \\\n",
|
||||
" chunk_naive_llm_classifier, \\\n",
|
||||
" chunk_remove_disconnected, \\\n",
|
||||
" infer_data_ontology, \\\n",
|
||||
" save_chunks_to_store, \\\n",
|
||||
" chunk_update_check, \\\n",
|
||||
" chunks_into_graph, \\\n",
|
||||
" source_documents_to_chunks, \\\n",
|
||||
" check_permissions_on_documents, \\\n",
|
||||
" classify_documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
|
|
@ -308,64 +269,8 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"import cognee\n",
|
||||
"from os import listdir, path\n",
|
||||
"\n",
|
||||
"data_path = path.abspath(\".data\")\n",
|
||||
"\n",
|
||||
"results = await cognee.add([job_1, job_2,job_3,job_4,job_5,job_position], \"example\")\n",
|
||||
"\n",
|
||||
"for result in results:\n",
|
||||
" print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "6f9b564de121713d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-09-20T14:02:55.564445Z",
|
||||
"start_time": "2024-09-20T14:02:55.562784Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "8911f8bd4f8c440a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-09-20T14:02:56.714408Z",
|
||||
"start_time": "2024-09-20T14:02:56.711812Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# from enum import Enum, auto\n",
|
||||
"# from typing import Optional, List, Union, Dict, Any\n",
|
||||
"# from pydantic import BaseModel, Field\n",
|
||||
"# \n",
|
||||
"# class Node(BaseModel):\n",
|
||||
"# \"\"\"Node in a knowledge graph.\"\"\"\n",
|
||||
"# id: str\n",
|
||||
"# name: str\n",
|
||||
"# type: str\n",
|
||||
"# description: str\n",
|
||||
"# properties: Optional[Dict[str, Any]] = Field(None, description = \"A dictionary of properties associated with the node.\")\n",
|
||||
"# \n",
|
||||
"# class Edge(BaseModel):\n",
|
||||
"# \"\"\"Edge in a knowledge graph.\"\"\"\n",
|
||||
"# source_node_id: str\n",
|
||||
"# target_node_id: str\n",
|
||||
"# relationship_name: str\n",
|
||||
"# properties: Optional[Dict[str, Any]] = Field(None, description = \"A dictionary of properties associated with the edge.\")\n",
|
||||
"# \n",
|
||||
"# class KnowledgeGraph(BaseModel):\n",
|
||||
"# \"\"\"Knowledge graph.\"\"\"\n",
|
||||
"# nodes: List[Node] = Field(..., default_factory=list)\n",
|
||||
"# edges: List[Edge] = Field(..., default_factory=list)"
|
||||
"await cognee.add([job_1, job_2, job_3, job_4, job_5, job_position], \"example\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -380,6 +285,21 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cognee.shared.data_models import KnowledgeGraph\n",
|
||||
"from cognee.modules.data.models import Dataset, Data\n",
|
||||
"from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n",
|
||||
"from cognee.modules.pipelines.tasks.Task import Task\n",
|
||||
"from cognee.modules.pipelines import run_tasks\n",
|
||||
"from cognee.modules.users.models import User\n",
|
||||
"from cognee.tasks import chunk_remove_disconnected, \\\n",
|
||||
" infer_data_ontology, \\\n",
|
||||
" save_chunks_to_store, \\\n",
|
||||
" chunk_update_check, \\\n",
|
||||
" chunks_into_graph, \\\n",
|
||||
" source_documents_to_chunks, \\\n",
|
||||
" check_permissions_on_documents, \\\n",
|
||||
" classify_documents\n",
|
||||
"\n",
|
||||
"async def run_cognify_pipeline(dataset: Dataset, user: User = None):\n",
|
||||
" data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)\n",
|
||||
"\n",
|
||||
|
|
@ -421,8 +341,13 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cognee.modules.users.methods import get_default_user\n",
|
||||
"from cognee.modules.data.methods import get_datasets_by_name\n",
|
||||
"\n",
|
||||
"user = await get_default_user()\n",
|
||||
"\n",
|
||||
"datasets = await get_datasets_by_name([\"example\"], user.id)\n",
|
||||
"\n",
|
||||
"await run_cognify_pipeline(datasets[0], user)"
|
||||
]
|
||||
},
|
||||
|
|
@ -490,7 +415,7 @@
|
|||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue