diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index 409d42548..aac149409 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -1,4 +1,4 @@ -from typing import Union, BinaryIO +from typing import Union, BinaryIO, List, Optional from cognee.modules.users.models import User from cognee.modules.pipelines import Task from cognee.tasks.ingestion import ingest_data, resolve_data_directories @@ -9,8 +9,9 @@ async def add( data: Union[BinaryIO, list[BinaryIO], str, list[str]], dataset_name: str = "main_dataset", user: User = None, + node_set: Optional[List[str]] = None, ): - tasks = [Task(resolve_data_directories), Task(ingest_data, dataset_name, user)] + tasks = [Task(resolve_data_directories), Task(ingest_data, dataset_name, user, node_set)] await cognee_pipeline( tasks=tasks, datasets=dataset_name, data=data, user=user, pipeline_name="add_pipeline" diff --git a/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py b/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py index 993d659be..112273bc9 100644 --- a/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +++ b/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py @@ -1,12 +1,11 @@ -from cognee.shared.logging_utils import get_logger -from typing import Dict, List, Optional, Any -import os import json from uuid import UUID - +from typing import List, Optional from chromadb import AsyncHttpClient, Settings from cognee.exceptions import InvalidValueError +from cognee.shared.logging_utils import get_logger +from cognee.modules.storage.utils import get_own_properties from cognee.infrastructure.engine.utils import parse_id from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.databases.vector.models.ScoredResult import ScoredResult @@ -134,7 +133,7 @@ class ChromaDBAdapter(VectorDBInterface): metadatas = [] for data_point in data_points: - metadata = data_point.model_dump() + metadata = get_own_properties(data_point) metadatas.append(process_data_for_chroma(metadata)) await collection.upsert( diff --git a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py index bb512596b..48c7dcaa8 100644 --- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py @@ -312,6 +312,12 @@ class LanceDBAdapter(VectorDBInterface): models_list = get_args(field_config.annotation) if any(hasattr(model, "model_fields") for model in models_list): related_models_fields.append(field_name) + elif models_list and any(get_args(model) is DataPoint for model in models_list): + related_models_fields.append(field_name) + elif models_list and any( + submodel is DataPoint for submodel in get_args(models_list[0]) + ): + related_models_fields.append(field_name) elif get_origin(field_config.annotation) == Optional: model = get_args(field_config.annotation) diff --git a/cognee/infrastructure/engine/models/DataPoint.py b/cognee/infrastructure/engine/models/DataPoint.py index a315f95f1..986b13a0e 100644 --- a/cognee/infrastructure/engine/models/DataPoint.py +++ b/cognee/infrastructure/engine/models/DataPoint.py @@ -1,10 +1,9 @@ -from datetime import datetime, timezone -from typing import Optional, Any, Dict -from uuid import UUID, uuid4 - -from pydantic import BaseModel, Field -from typing_extensions import TypedDict import pickle +from uuid import UUID, uuid4 +from pydantic import BaseModel, Field +from datetime import datetime, timezone +from typing_extensions import TypedDict +from typing import Optional, Any, Dict, List # Define metadata type @@ -27,6 +26,7 @@ class DataPoint(BaseModel): topological_rank: Optional[int] = 0 metadata: Optional[MetaData] = {"index_fields": []} type: str = Field(default_factory=lambda: DataPoint.__name__) + belongs_to_set: Optional[List["DataPoint"]] = None def __init__(self, **data): super().__init__(**data) diff --git a/cognee/modules/data/models/Data.py b/cognee/modules/data/models/Data.py index bbfdbed32..422e013df 100644 --- a/cognee/modules/data/models/Data.py +++ b/cognee/modules/data/models/Data.py @@ -20,6 +20,7 @@ class Data(Base): owner_id = Column(UUID, index=True) content_hash = Column(String) external_metadata = Column(JSON) + node_set = Column(JSON, nullable=True) # Store NodeSet as JSON list of strings token_count = Column(Integer) created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) @@ -44,5 +45,6 @@ class Data(Base): "rawDataLocation": self.raw_data_location, "createdAt": self.created_at.isoformat(), "updatedAt": self.updated_at.isoformat() if self.updated_at else None, + "nodeSet": self.node_set, # "datasets": [dataset.to_json() for dataset in self.datasets] } diff --git a/cognee/modules/data/processing/document_types/Document.py b/cognee/modules/data/processing/document_types/Document.py index c75203231..af1be84c2 100644 --- a/cognee/modules/data/processing/document_types/Document.py +++ b/cognee/modules/data/processing/document_types/Document.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, List from cognee.infrastructure.engine import DataPoint from cognee.modules.chunking.Chunker import Chunker diff --git a/cognee/modules/engine/models/__init__.py b/cognee/modules/engine/models/__init__.py index bdbd88f02..4ab2de0de 100644 --- a/cognee/modules/engine/models/__init__.py +++ b/cognee/modules/engine/models/__init__.py @@ -2,3 +2,4 @@ from .Entity import Entity from .EntityType import EntityType from .TableRow import TableRow from .TableType import TableType +from .node_set import NodeSet diff --git a/cognee/modules/engine/models/node_set.py b/cognee/modules/engine/models/node_set.py new file mode 100644 index 000000000..33fe3f557 --- /dev/null +++ b/cognee/modules/engine/models/node_set.py @@ -0,0 +1,8 @@ +from cognee.infrastructure.engine import DataPoint + + +class NodeSet(DataPoint): + """NodeSet data point.""" + + name: str + metadata: dict = {"index_fields": ["name"]} diff --git a/cognee/modules/visualization/cognee_network_visualization.py b/cognee/modules/visualization/cognee_network_visualization.py index 86ad7ce49..b0f2f0a1a 100644 --- a/cognee/modules/visualization/cognee_network_visualization.py +++ b/cognee/modules/visualization/cognee_network_visualization.py @@ -1,18 +1,17 @@ -from cognee.shared.logging_utils import get_logger -import networkx as nx -import json import os +import json +import networkx +from cognee.shared.logging_utils import get_logger from cognee.infrastructure.files.storage import LocalStorage - logger = get_logger() async def cognee_network_visualization(graph_data, destination_file_path: str = None): nodes_data, edges_data = graph_data - G = nx.DiGraph() + G = networkx.DiGraph() nodes_list = [] color_map = { @@ -184,8 +183,8 @@ async def cognee_network_visualization(graph_data, destination_file_path: str = """ - html_content = html_template.replace("{nodes}", json.dumps(nodes_list, default=str)) - html_content = html_content.replace("{links}", json.dumps(links_list, default=str)) + html_content = html_template.replace("{nodes}", json.dumps(nodes_list)) + html_content = html_content.replace("{links}", json.dumps(links_list)) if not destination_file_path: home_dir = os.path.expanduser("~") diff --git a/cognee/notebooks/github_analysis_step_by_step.ipynb b/cognee/notebooks/github_analysis_step_by_step.ipynb new file mode 100644 index 000000000..54f657bb0 --- /dev/null +++ b/cognee/notebooks/github_analysis_step_by_step.ipynb @@ -0,0 +1,37 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/cognee/tasks/documents/classify_documents.py b/cognee/tasks/documents/classify_documents.py index 7211ebced..97ff3e483 100644 --- a/cognee/tasks/documents/classify_documents.py +++ b/cognee/tasks/documents/classify_documents.py @@ -8,6 +8,8 @@ from cognee.modules.data.processing.document_types import ( TextDocument, UnstructuredDocument, ) +from cognee.modules.engine.models.node_set import NodeSet +from cognee.modules.engine.utils.generate_node_id import generate_node_id EXTENSION_TO_DOCUMENT_CLASS = { "pdf": PdfDocument, # Text documents @@ -49,6 +51,29 @@ EXTENSION_TO_DOCUMENT_CLASS = { } +def update_node_set(document): + """Extracts node_set from document's external_metadata.""" + try: + external_metadata = json.loads(document.external_metadata) + except json.JSONDecodeError: + return + + if not isinstance(external_metadata, dict): + return + + if "node_set" not in external_metadata: + return + + node_set = external_metadata["node_set"] + if not isinstance(node_set, list): + return + + document.belongs_to_set = [ + NodeSet(id=generate_node_id(f"NodeSet:{node_set_name}"), name=node_set_name) + for node_set_name in node_set + ] + + async def classify_documents(data_documents: list[Data]) -> list[Document]: """ Classifies a list of data items into specific document types based on file extensions. @@ -67,6 +92,7 @@ async def classify_documents(data_documents: list[Data]) -> list[Document]: mime_type=data_item.mime_type, external_metadata=json.dumps(data_item.external_metadata, indent=4), ) + update_node_set(document) documents.append(document) return documents diff --git a/cognee/tasks/documents/extract_chunks_from_documents.py b/cognee/tasks/documents/extract_chunks_from_documents.py index a585d519c..08df7fa57 100644 --- a/cognee/tasks/documents/extract_chunks_from_documents.py +++ b/cognee/tasks/documents/extract_chunks_from_documents.py @@ -40,6 +40,7 @@ async def extract_chunks_from_documents( document_token_count = 0 for document_chunk in document.read(max_chunk_size=max_chunk_size, chunker_cls=chunker): document_token_count += document_chunk.chunk_size + document_chunk.belongs_to_set = document.belongs_to_set yield document_chunk await update_document_token_count(document.id, document_token_count) diff --git a/cognee/tasks/ingestion/ingest_data.py b/cognee/tasks/ingestion/ingest_data.py index 3608ed4e6..c7c0251ae 100644 --- a/cognee/tasks/ingestion/ingest_data.py +++ b/cognee/tasks/ingestion/ingest_data.py @@ -1,7 +1,8 @@ -from typing import Any, List - import dlt import s3fs +import json +import inspect +from typing import Union, BinaryIO, Any, List, Optional import cognee.modules.ingestion as ingestion from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.data.methods import create_dataset, get_dataset_data, get_datasets_by_name @@ -12,13 +13,13 @@ from cognee.modules.users.permissions.methods import give_permission_on_document from .get_dlt_destination import get_dlt_destination from .save_data_item_to_storage import save_data_item_to_storage -from typing import Union, BinaryIO -import inspect from cognee.api.v1.add.config import get_s3_config -async def ingest_data(data: Any, dataset_name: str, user: User): +async def ingest_data( + data: Any, dataset_name: str, user: User, node_set: Optional[List[str]] = None +): destination = get_dlt_destination() if not user: @@ -68,9 +69,12 @@ async def ingest_data(data: Any, dataset_name: str, user: User): "mime_type": file_metadata["mime_type"], "content_hash": file_metadata["content_hash"], "owner_id": str(user.id), + "node_set": json.dumps(node_set) if node_set else None, } - async def store_data_to_dataset(data: Any, dataset_name: str, user: User): + async def store_data_to_dataset( + data: Any, dataset_name: str, user: User, node_set: Optional[List[str]] = None + ): if not isinstance(data, list): # Convert data to a list as we work with lists further down. data = [data] @@ -107,6 +111,10 @@ async def ingest_data(data: Any, dataset_name: str, user: User): await session.execute(select(Data).filter(Data.id == data_id)) ).scalar_one_or_none() + ext_metadata = get_external_metadata_dict(data_item) + if node_set: + ext_metadata["node_set"] = node_set + if data_point is not None: data_point.name = file_metadata["name"] data_point.raw_data_location = file_metadata["file_path"] @@ -114,7 +122,8 @@ async def ingest_data(data: Any, dataset_name: str, user: User): data_point.mime_type = file_metadata["mime_type"] data_point.owner_id = user.id data_point.content_hash = file_metadata["content_hash"] - data_point.external_metadata = (get_external_metadata_dict(data_item),) + data_point.external_metadata = ext_metadata + data_point.node_set = json.dumps(node_set) if node_set else None await session.merge(data_point) else: data_point = Data( @@ -125,7 +134,8 @@ async def ingest_data(data: Any, dataset_name: str, user: User): mime_type=file_metadata["mime_type"], owner_id=user.id, content_hash=file_metadata["content_hash"], - external_metadata=get_external_metadata_dict(data_item), + external_metadata=ext_metadata, + node_set=json.dumps(node_set) if node_set else None, token_count=-1, ) @@ -150,7 +160,7 @@ async def ingest_data(data: Any, dataset_name: str, user: User): db_engine = get_relational_engine() - file_paths = await store_data_to_dataset(data, dataset_name, user) + file_paths = await store_data_to_dataset(data, dataset_name, user, node_set) # Note: DLT pipeline has its own event loop, therefore objects created in another event loop # can't be used inside the pipeline diff --git a/examples/python/entity_completion_comparison.py b/examples/python/entity_completion_comparison.py deleted file mode 100644 index 20d311adf..000000000 --- a/examples/python/entity_completion_comparison.py +++ /dev/null @@ -1,162 +0,0 @@ -import cognee -import asyncio -from cognee.shared.logging_utils import get_logger, ERROR - -from cognee.api.v1.search import SearchType -from cognee.modules.retrieval.EntityCompletionRetriever import EntityCompletionRetriever -from cognee.modules.retrieval.context_providers.TripletSearchContextProvider import ( - TripletSearchContextProvider, -) -from cognee.modules.retrieval.context_providers.SummarizedTripletSearchContextProvider import ( - SummarizedTripletSearchContextProvider, -) -from cognee.modules.retrieval.entity_extractors.DummyEntityExtractor import DummyEntityExtractor - -article_1 = """ -Title: The Theory of Relativity: A Revolutionary Breakthrough -Author: Dr. Sarah Chen - -Albert Einstein's theory of relativity fundamentally changed our understanding of space, time, and gravity. Published in 1915, the general theory of relativity describes gravity as a consequence of the curvature of spacetime caused by mass and energy. This groundbreaking work built upon his special theory of relativity from 1905, which introduced the famous equation E=mc². - -Einstein's work at the Swiss Patent Office gave him time to develop these revolutionary ideas. His mathematical framework predicted several phenomena that were later confirmed, including: -- The bending of light by gravity -- The precession of Mercury's orbit -- The existence of black holes - -The theory continues to be tested and validated today, most recently through the detection of gravitational waves by LIGO in 2015, exactly 100 years after its publication. -""" - -article_2 = """ -Title: The Manhattan Project and Its Scientific Director -Author: Prof. Michael Werner - -J. Robert Oppenheimer's leadership of the Manhattan Project marked a pivotal moment in scientific history. As scientific director of the Los Alamos Laboratory, he assembled and led an extraordinary team of physicists in the development of the atomic bomb during World War II. - -Oppenheimer's journey to Los Alamos began at Harvard and continued through his groundbreaking work in quantum mechanics and nuclear physics at Berkeley. His expertise in theoretical physics and exceptional leadership abilities made him the ideal candidate to head the secret weapons laboratory. - -Key aspects of his directorship included: -- Recruitment of top scientific talent from across the country -- Integration of theoretical physics with practical engineering challenges -- Development of implosion-type nuclear weapons -- Management of complex security and ethical considerations - -After witnessing the first nuclear test, codenamed Trinity, Oppenheimer famously quoted the Bhagavad Gita: "Now I am become Death, the destroyer of worlds." This moment reflected the profound moral implications of scientific advancement that would shape his later advocacy for international atomic controls. -""" - -article_3 = """ -Title: The Birth of Quantum Physics -Author: Dr. Lisa Martinez - -The early 20th century witnessed a revolutionary transformation in our understanding of the microscopic world. The development of quantum mechanics emerged from the collaborative efforts of numerous brilliant physicists grappling with phenomena that classical physics couldn't explain. - -Key contributors and their insights included: -- Max Planck's discovery of energy quantization (1900) -- Niels Bohr's model of the atom with discrete energy levels (1913) -- Werner Heisenberg's uncertainty principle (1927) -- Erwin Schrödinger's wave equation (1926) -- Paul Dirac's quantum theory of the electron (1928) - -Einstein's 1905 paper on the photoelectric effect, which demonstrated light's particle nature, was a crucial contribution to this field. The Copenhagen interpretation, developed primarily by Bohr and Heisenberg, became the standard understanding of quantum mechanics, despite ongoing debates about its philosophical implications. These foundational developments continue to influence modern physics, from quantum computing to quantum field theory. -""" - - -async def main(enable_steps): - # Step 1: Reset data and system state - if enable_steps.get("prune_data"): - await cognee.prune.prune_data() - print("Data pruned.") - - if enable_steps.get("prune_system"): - await cognee.prune.prune_system(metadata=True) - print("System pruned.") - - # Step 2: Add text - if enable_steps.get("add_text"): - text_list = [article_1, article_2, article_3] - for text in text_list: - await cognee.add(text) - print(f"Added text: {text[:50]}...") - - # Step 3: Create knowledge graph - if enable_steps.get("cognify"): - await cognee.cognify() - print("Knowledge graph created.") - - # Step 4: Query insights using our new retrievers - if enable_steps.get("retriever"): - # Common settings - search_settings = { - "top_k": 5, - "collections": ["Entity_name", "TextSummary_text"], - "properties_to_project": ["name", "description", "text"], - } - - # Create both context providers - direct_provider = TripletSearchContextProvider(**search_settings) - summary_provider = SummarizedTripletSearchContextProvider(**search_settings) - - # Create retrievers with different providers - direct_retriever = EntityCompletionRetriever( - extractor=DummyEntityExtractor(), - context_provider=direct_provider, - system_prompt_path="answer_simple_question.txt", - user_prompt_path="context_for_question.txt", - ) - - summary_retriever = EntityCompletionRetriever( - extractor=DummyEntityExtractor(), - context_provider=summary_provider, - system_prompt_path="answer_simple_question.txt", - user_prompt_path="context_for_question.txt", - ) - - query = "What were the early contributions to quantum physics?" - print("\nQuery:", query) - - # Try with direct triplets - print("\n=== Direct Triplets ===") - context = await direct_retriever.get_context(query) - print("\nEntity Context:") - print(context) - - result = await direct_retriever.get_completion(query) - print("\nEntity Completion:") - print(result) - - # Try with summarized triplets - print("\n=== Summarized Triplets ===") - context = await summary_retriever.get_context(query) - print("\nEntity Context:") - print(context) - - result = await summary_retriever.get_completion(query) - print("\nEntity Completion:") - print(result) - - # Compare with standard search - print("\n=== Standard Search ===") - search_results = await cognee.search( - query_type=SearchType.GRAPH_COMPLETION, query_text=query - ) - print(search_results) - - -if __name__ == "__main__": - logger = get_logger(level=ERROR) - - rebuild_kg = True - retrieve = True - steps_to_enable = { - "prune_data": rebuild_kg, - "prune_system": rebuild_kg, - "add_text": rebuild_kg, - "cognify": rebuild_kg, - "retriever": retrieve, - } - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - loop.run_until_complete(main(steps_to_enable)) - finally: - loop.run_until_complete(loop.shutdown_asyncgens()) diff --git a/examples/python/pokemon_datapoints_example.py b/examples/python/pokemon_datapoints_example.py deleted file mode 100644 index e67967529..000000000 --- a/examples/python/pokemon_datapoints_example.py +++ /dev/null @@ -1,208 +0,0 @@ -# Standard library imports -import os -import json -import asyncio -import pathlib -from uuid import uuid5, NAMESPACE_OID -from typing import List, Optional -from pathlib import Path - -import dlt -import requests -import cognee -from cognee.low_level import DataPoint, setup as cognee_setup -from cognee.api.v1.search import SearchType -from cognee.tasks.storage import add_data_points -from cognee.modules.pipelines.tasks.task import Task -from cognee.modules.pipelines import run_tasks - - -BASE_URL = "https://pokeapi.co/api/v2/" -os.environ["BUCKET_URL"] = "./.data_storage" -os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "true" - - -# Data Models -class Abilities(DataPoint): - name: str = "Abilities" - metadata: dict = {"index_fields": ["name"]} - - -class PokemonAbility(DataPoint): - name: str - ability__name: str - ability__url: str - is_hidden: bool - slot: int - _dlt_load_id: str - _dlt_id: str - _dlt_parent_id: str - _dlt_list_idx: str - is_type: Abilities - metadata: dict = {"index_fields": ["ability__name"]} - - -class Pokemons(DataPoint): - name: str = "Pokemons" - have: Abilities - metadata: dict = {"index_fields": ["name"]} - - -class Pokemon(DataPoint): - name: str - base_experience: int - height: int - weight: int - is_default: bool - order: int - location_area_encounters: str - species__name: str - species__url: str - cries__latest: str - cries__legacy: str - sprites__front_default: str - sprites__front_shiny: str - sprites__back_default: Optional[str] - sprites__back_shiny: Optional[str] - _dlt_load_id: str - _dlt_id: str - is_type: Pokemons - abilities: List[PokemonAbility] - metadata: dict = {"index_fields": ["name"]} - - -# Data Collection Functions -@dlt.resource(write_disposition="replace") -def pokemon_list(limit: int = 50): - response = requests.get(f"{BASE_URL}pokemon", params={"limit": limit}) - response.raise_for_status() - yield response.json()["results"] - - -@dlt.transformer(data_from=pokemon_list) -def pokemon_details(pokemons): - """Fetches detailed info for each Pokémon""" - for pokemon in pokemons: - response = requests.get(pokemon["url"]) - response.raise_for_status() - yield response.json() - - -# Data Loading Functions -def load_abilities_data(jsonl_abilities): - abilities_root = Abilities() - pokemon_abilities = [] - - for jsonl_ability in jsonl_abilities: - with open(jsonl_ability, "r") as f: - for line in f: - ability = json.loads(line) - ability["id"] = uuid5(NAMESPACE_OID, ability["_dlt_id"]) - ability["name"] = ability["ability__name"] - ability["is_type"] = abilities_root - pokemon_abilities.append(ability) - - return abilities_root, pokemon_abilities - - -def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root): - pokemons = [] - - for jsonl_pokemon in jsonl_pokemons: - with open(jsonl_pokemon, "r") as f: - for line in f: - pokemon_data = json.loads(line) - abilities = [ - ability - for ability in pokemon_abilities - if ability["_dlt_parent_id"] == pokemon_data["_dlt_id"] - ] - pokemon_data["external_id"] = pokemon_data["id"] - pokemon_data["id"] = uuid5(NAMESPACE_OID, str(pokemon_data["id"])) - pokemon_data["abilities"] = [PokemonAbility(**ability) for ability in abilities] - pokemon_data["is_type"] = pokemon_root - pokemons.append(Pokemon(**pokemon_data)) - - return pokemons - - -# Main Application Logic -async def setup_and_process_data(): - """Setup configuration and process Pokemon data""" - # Setup configuration - data_directory_path = str( - pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage")).resolve() - ) - cognee_directory_path = str( - pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system")).resolve() - ) - - cognee.config.data_root_directory(data_directory_path) - cognee.config.system_root_directory(cognee_directory_path) - - # Initialize pipeline and collect data - pipeline = dlt.pipeline( - pipeline_name="pokemon_pipeline", - destination="filesystem", - dataset_name="pokemon_data", - ) - info = pipeline.run([pokemon_list, pokemon_details]) - print(info) - - # Load and process data - STORAGE_PATH = Path(".data_storage/pokemon_data/pokemon_details") - jsonl_pokemons = sorted(STORAGE_PATH.glob("*.jsonl")) - if not jsonl_pokemons: - raise FileNotFoundError("No JSONL files found in the storage directory.") - - ABILITIES_PATH = Path(".data_storage/pokemon_data/pokemon_details__abilities") - jsonl_abilities = sorted(ABILITIES_PATH.glob("*.jsonl")) - if not jsonl_abilities: - raise FileNotFoundError("No JSONL files found in the storage directory.") - - # Process data - abilities_root, pokemon_abilities = load_abilities_data(jsonl_abilities) - pokemon_root = Pokemons(have=abilities_root) - pokemons = load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root) - - return pokemons - - -async def pokemon_cognify(pokemons): - """Process Pokemon data with Cognee and perform search""" - # Setup and run Cognee tasks - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - await cognee_setup() - - # tasks = [Task(add_data_points, task_config={"batch_size": 50})] - tasks = [Task(add_data_points)] - - results = run_tasks( - tasks=tasks, - data=pokemons, - dataset_id=uuid5(NAMESPACE_OID, "Pokemon"), - pipeline_name="pokemon_pipeline", - ) - - async for result in results: - print(result) - print("Done") - - # Perform search - search_results = await cognee.search( - query_type=SearchType.GRAPH_COMPLETION, query_text="pokemons?" - ) - - print("Search results:") - for result_text in search_results: - print(result_text) - - -async def main(): - pokemons = await setup_and_process_data() - await pokemon_cognify(pokemons) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/python/simple_node_set_example.py b/examples/python/simple_node_set_example.py new file mode 100644 index 000000000..471e51f7e --- /dev/null +++ b/examples/python/simple_node_set_example.py @@ -0,0 +1,44 @@ +import os +import asyncio +import cognee +from cognee.api.v1.visualize.visualize import visualize_graph +from cognee.shared.logging_utils import get_logger, ERROR + +text_a = """ + AI is revolutionizing financial services through intelligent fraud detection + and automated customer service platforms. + """ + +text_b = """ + Advances in AI are enabling smarter systems that learn and adapt over time. + """ + +text_c = """ + MedTech startups have seen significant growth in recent years, driven by innovation + in digital health and medical devices. + """ + +node_set_a = ["AI", "FinTech"] +node_set_b = ["AI"] +node_set_c = ["MedTech"] + + +async def main(): + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + await cognee.add(text_a, node_set=node_set_a) + await cognee.add(text_b, node_set=node_set_b) + await cognee.add(text_c, node_set=node_set_c) + await cognee.cognify() + + visualization_path = os.path.join( + os.path.dirname(__file__), "./.artifacts/graph_visualization.html" + ) + await visualize_graph(visualization_path) + + +if __name__ == "__main__": + logger = get_logger(level=ERROR) + loop = asyncio.new_event_loop() + asyncio.run(main()) diff --git a/poetry.lock b/poetry.lock index 5879918aa..e5e6a1291 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. [[package]] name = "aiobotocore" @@ -451,7 +451,7 @@ description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.7" groups = ["main"] -markers = "python_version < \"3.11\"" +markers = "python_version == \"3.10\"" files = [ {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, @@ -608,7 +608,7 @@ description = "Backport of CPython tarfile module" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"deepeval\" and python_version <= \"3.11\"" +markers = "extra == \"deepeval\" and python_version < \"3.12\"" files = [ {file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"}, {file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"}, @@ -1226,7 +1226,7 @@ files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "(platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"chromadb\" or extra == \"llama-index\" or extra == \"deepeval\") and (os_name == \"nt\" or platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\")", dev = "platform_system == \"Windows\" or sys_platform == \"win32\""} +markers = {main = "(platform_system == \"Windows\" or sys_platform == \"win32\" or os_name == \"nt\" or extra == \"llama-index\" or extra == \"deepeval\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"chromadb\" or extra == \"llama-index\" or extra == \"deepeval\")", dev = "platform_system == \"Windows\" or sys_platform == \"win32\""} [[package]] name = "coloredlogs" @@ -1235,7 +1235,7 @@ description = "Colored terminal output for Python's logging module" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" groups = ["main"] -markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")" +markers = "(extra == \"codegraph\" or extra == \"chromadb\") and (python_version <= \"3.12\" or extra == \"chromadb\")" files = [ {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, @@ -1849,7 +1849,7 @@ description = "Python datetimes made easy" optional = false python-versions = ">=3.9" groups = ["main"] -markers = "python_version >= \"3.13\"" +markers = "python_full_version == \"3.13.0\"" files = [ {file = "dlt_pendulum-3.0.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9ae1222828474f9e4743f8929f8026abe2d0b3a99427a483da2868690b017332"}, {file = "dlt_pendulum-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:75e1b758f88f887706902438fa5b293f11cec5d656c6540c9957da8c9b953198"}, @@ -2032,7 +2032,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["main", "dev"] -markers = "python_version < \"3.11\"" +markers = "python_version == \"3.10\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -2110,30 +2110,30 @@ standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "htt [[package]] name = "fastapi-users" -version = "14.0.0" +version = "14.0.1" description = "Ready-to-use and customizable users management for FastAPI" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "fastapi_users-14.0.0-py3-none-any.whl", hash = "sha256:e1230e044ddc2209b890b5b5c6fc1d13def961298d40e01c2d28f8bc2fe8c4c7"}, - {file = "fastapi_users-14.0.0.tar.gz", hash = "sha256:6dceefbd2db87a17f791ef431d616bb5ce40cb123da7922969b704cbee5e7384"}, + {file = "fastapi_users-14.0.1-py3-none-any.whl", hash = "sha256:074df59676dccf79412d2880bdcb661ab1fabc2ecec1f043b4e6a23be97ed9e1"}, + {file = "fastapi_users-14.0.1.tar.gz", hash = "sha256:8c032b3a75c6fb2b1f5eab8ffce5321176e9916efe1fe93e7c15ee55f0b02236"}, ] [package.dependencies] email-validator = ">=1.1.0,<2.3" fastapi = ">=0.65.2" -fastapi-users-db-sqlalchemy = {version = ">=6.0.0", optional = true, markers = "extra == \"sqlalchemy\""} +fastapi-users-db-sqlalchemy = {version = ">=7.0.0", optional = true, markers = "extra == \"sqlalchemy\""} makefun = ">=1.11.2,<2.0.0" pwdlib = {version = "0.2.1", extras = ["argon2", "bcrypt"]} -pyjwt = {version = "2.9.0", extras = ["crypto"]} -python-multipart = "0.0.17" +pyjwt = {version = "2.10.1", extras = ["crypto"]} +python-multipart = "0.0.20" [package.extras] -beanie = ["fastapi-users-db-beanie (>=3.0.0)"] +beanie = ["fastapi-users-db-beanie (>=4.0.0)"] oauth = ["httpx-oauth (>=0.13)"] redis = ["redis (>=4.3.3,<6.0.0)"] -sqlalchemy = ["fastapi-users-db-sqlalchemy (>=6.0.0)"] +sqlalchemy = ["fastapi-users-db-sqlalchemy (>=7.0.0)"] [[package]] name = "fastapi-users-db-sqlalchemy" @@ -2158,7 +2158,7 @@ description = "Fast, light, accurate library built for retrieval embedding gener optional = true python-versions = ">=3.9.0" groups = ["main"] -markers = "python_version < \"3.13\" and extra == \"codegraph\"" +markers = "extra == \"codegraph\" and python_version <= \"3.12\"" files = [ {file = "fastembed-0.6.0-py3-none-any.whl", hash = "sha256:a08385e9388adea0529a586004f2d588c9787880a510e4e5d167127a11e75328"}, {file = "fastembed-0.6.0.tar.gz", hash = "sha256:5c9ead25f23449535b07243bbe1f370b820dcc77ec2931e61674e3fe7ff24733"}, @@ -2230,7 +2230,7 @@ description = "The FlatBuffers serialization format for Python" optional = true python-versions = "*" groups = ["main"] -markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")" +markers = "(extra == \"codegraph\" or extra == \"chromadb\") and (python_version <= \"3.12\" or extra == \"chromadb\")" files = [ {file = "flatbuffers-25.2.10-py2.py3-none-any.whl", hash = "sha256:ebba5f4d5ea615af3f7fd70fc310636fbb2bbd1f566ac0a23d98dd412de50051"}, {file = "flatbuffers-25.2.10.tar.gz", hash = "sha256:97e451377a41262f8d9bd4295cc836133415cc03d8cb966410a4af92eb00d26e"}, @@ -2704,7 +2704,7 @@ description = "Common protobufs used in Google APIs" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"gemini\" or extra == \"chromadb\" or extra == \"deepeval\" or python_version < \"3.11\" and (extra == \"chromadb\" or extra == \"deepeval\" or extra == \"gemini\")" +markers = "extra == \"gemini\" or extra == \"chromadb\" or extra == \"deepeval\" or python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"deepeval\" or extra == \"gemini\")" files = [ {file = "googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8"}, {file = "googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257"}, @@ -2909,7 +2909,7 @@ description = "HTTP/2-based RPC framework" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"gemini\" or extra == \"chromadb\" or extra == \"deepeval\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"milvus\" or python_version < \"3.11\" and (extra == \"chromadb\" or extra == \"deepeval\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"milvus\" or extra == \"gemini\")" +markers = "extra == \"gemini\" or extra == \"chromadb\" or extra == \"deepeval\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"milvus\" or python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"deepeval\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"milvus\" or extra == \"gemini\")" files = [ {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"}, {file = "grpcio-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:f5a27dddefe0e2357d3e617b9079b4bfdc91341a91565111a21ed6ebbc51b22d"}, @@ -3369,7 +3369,7 @@ description = "Human friendly output for text interfaces using Python" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" groups = ["main"] -markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")" +markers = "(extra == \"codegraph\" or extra == \"chromadb\") and (python_version <= \"3.12\" or extra == \"chromadb\")" files = [ {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, @@ -3570,7 +3570,7 @@ description = "IPython: Productive Interactive Computing" optional = true python-versions = ">=3.10" groups = ["main", "dev"] -markers = "python_version < \"3.11\"" +markers = "python_version == \"3.10\"" files = [ {file = "ipython-8.35.0-py3-none-any.whl", hash = "sha256:e6b7470468ba6f1f0a7b116bb688a3ece2f13e2f94138e508201fad677a788ba"}, {file = "ipython-8.35.0.tar.gz", hash = "sha256:d200b7d93c3f5883fc36ab9ce28a18249c7706e51347681f80a0aef9895f2520"}, @@ -3610,7 +3610,7 @@ description = "IPython: Productive Interactive Computing" optional = true python-versions = ">=3.11" groups = ["main", "dev"] -markers = "python_version >= \"3.11\"" +markers = "python_version >= \"3.11\" and python_version < \"3.13\" or python_full_version == \"3.13.0\"" files = [ {file = "ipython-9.1.0-py3-none-any.whl", hash = "sha256:2df07257ec2f84a6b346b8d83100bcf8fa501c6e01ab75cd3799b0bb253b3d2a"}, {file = "ipython-9.1.0.tar.gz", hash = "sha256:a47e13a5e05e02f3b8e1e7a0f9db372199fe8c3763532fe7a1e0379e4e135f16"}, @@ -3644,7 +3644,7 @@ description = "Defines a variety of Pygments lexers for highlighting IPython cod optional = true python-versions = ">=3.8" groups = ["main", "dev"] -markers = "python_version >= \"3.11\"" +markers = "python_version >= \"3.11\" and python_version < \"3.13\" or python_full_version == \"3.13.0\"" files = [ {file = "ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c"}, {file = "ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81"}, @@ -5042,7 +5042,7 @@ description = "Python logging made (stupidly) simple" optional = true python-versions = "<4.0,>=3.5" groups = ["main"] -markers = "python_version < \"3.13\" and extra == \"codegraph\"" +markers = "extra == \"codegraph\" and python_version <= \"3.12\"" files = [ {file = "loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c"}, {file = "loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6"}, @@ -5722,7 +5722,7 @@ description = "Python extension for MurmurHash (MurmurHash3), a set of fast and optional = true python-versions = ">=3.9" groups = ["main"] -markers = "(extra == \"codegraph\" or extra == \"chromadb\") and python_version < \"3.13\" or extra == \"chromadb\"" +markers = "(extra == \"codegraph\" or extra == \"chromadb\") and (python_version <= \"3.12\" or extra == \"chromadb\")" files = [ {file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:eaf4ac5c6ee18ca9232238364d7f2a213278ae5ca97897cafaa123fcc7bb8bec"}, {file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:48f9aa8ccb9ad1d577a16104834ac44ff640d8de8c0caed09a2300df7ce8460a"}, @@ -5848,7 +5848,7 @@ description = "Python library for arbitrary-precision floating-point arithmetic" optional = true python-versions = "*" groups = ["main"] -markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")" +markers = "(extra == \"codegraph\" or extra == \"chromadb\") and (python_version <= \"3.12\" or extra == \"chromadb\")" files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, @@ -6423,7 +6423,7 @@ description = "ONNX Runtime is a runtime accelerator for Machine Learning models optional = true python-versions = ">=3.10" groups = ["main"] -markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")" +markers = "(extra == \"codegraph\" or extra == \"chromadb\") and (python_version <= \"3.12\" or extra == \"chromadb\")" files = [ {file = "onnxruntime-1.21.0-cp310-cp310-macosx_13_0_universal2.whl", hash = "sha256:95513c9302bc8dd013d84148dcf3168e782a80cdbf1654eddc948a23147ccd3d"}, {file = "onnxruntime-1.21.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:635d4ab13ae0f150dd4c6ff8206fd58f1c6600636ecc796f6f0c42e4c918585b"}, @@ -6896,8 +6896,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.22.4", markers = "python_version < \"3.11\""}, - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -6993,7 +6993,7 @@ description = "Python datetimes made easy" optional = false python-versions = ">=3.8" groups = ["main"] -markers = "python_version < \"3.13\"" +markers = "python_version <= \"3.12\"" files = [ {file = "pendulum-3.0.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2cf9e53ef11668e07f73190c805dbdf07a1939c3298b78d5a9203a86775d1bfd"}, {file = "pendulum-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fb551b9b5e6059377889d2d878d940fd0bbb80ae4810543db18e6f77b02c5ef6"}, @@ -7531,7 +7531,7 @@ description = "" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "python_version == \"3.10\" and extra == \"codegraph\" or (extra == \"chromadb\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"deepeval\" or extra == \"gemini\" or extra == \"milvus\") and python_version < \"3.11\" or (python_version == \"3.12\" or extra == \"gemini\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"chromadb\" or extra == \"deepeval\" or extra == \"milvus\") and (extra == \"codegraph\" or extra == \"gemini\" or extra == \"chromadb\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"deepeval\" or extra == \"milvus\") and python_version >= \"3.12\" or python_version == \"3.11\" and (extra == \"codegraph\" or extra == \"gemini\" or extra == \"chromadb\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"deepeval\" or extra == \"milvus\")" +markers = "(python_version <= \"3.12\" or extra == \"gemini\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"chromadb\" or extra == \"deepeval\" or extra == \"milvus\") and (extra == \"codegraph\" or extra == \"chromadb\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"gemini\" or extra == \"deepeval\" or extra == \"milvus\" or python_version == \"3.10\") and (extra == \"codegraph\" or extra == \"chromadb\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"deepeval\" or extra == \"gemini\" or extra == \"milvus\")" files = [ {file = "protobuf-5.29.4-cp310-abi3-win32.whl", hash = "sha256:13eb236f8eb9ec34e63fc8b1d6efd2777d062fa6aaa68268fb67cf77f6839ad7"}, {file = "protobuf-5.29.4-cp310-abi3-win_amd64.whl", hash = "sha256:bcefcdf3976233f8a502d265eb65ea740c989bacc6c30a58290ed0e519eb4b8d"}, @@ -7645,7 +7645,7 @@ description = "Fast and parallel snowball stemmer" optional = true python-versions = "*" groups = ["main"] -markers = "python_version < \"3.13\" and extra == \"codegraph\"" +markers = "extra == \"codegraph\" and python_version <= \"3.12\"" files = [ {file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:bfbd9034ae00419ff2154e33b8f5b4c4d99d1f9271f31ed059e5c7e9fa005844"}, {file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7162ae66df2bb0fc39b350c24a049f5f5151c03c046092ba095c2141ec223a2"}, @@ -7982,14 +7982,14 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pyjwt" -version = "2.9.0" +version = "2.10.1" description = "JSON Web Token implementation in Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main"] files = [ - {file = "PyJWT-2.9.0-py3-none-any.whl", hash = "sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850"}, - {file = "pyjwt-2.9.0.tar.gz", hash = "sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c"}, + {file = "PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb"}, + {file = "pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953"}, ] [package.dependencies] @@ -8047,8 +8047,8 @@ astroid = ">=3.3.8,<=3.4.0.dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ {version = ">=0.2", markers = "python_version < \"3.11\""}, + {version = ">=0.3.6", markers = "python_version >= \"3.11\""}, {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, - {version = ">=0.3.6", markers = "python_version == \"3.11\""}, ] isort = ">=4.2.5,<5.13 || >5.13,<7" mccabe = ">=0.6,<0.8" @@ -8189,7 +8189,7 @@ description = "A python implementation of GNU readline." optional = true python-versions = ">=3.8" groups = ["main"] -markers = "sys_platform == \"win32\" and (python_version == \"3.10\" or extra == \"chromadb\" or extra == \"codegraph\") and (extra == \"chromadb\" or python_version == \"3.12\" or python_version == \"3.10\" or python_version == \"3.11\") and (extra == \"codegraph\" or extra == \"chromadb\")" +markers = "sys_platform == \"win32\" and (extra == \"codegraph\" or extra == \"chromadb\") and (python_version <= \"3.12\" or extra == \"chromadb\")" files = [ {file = "pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6"}, {file = "pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7"}, @@ -8462,14 +8462,14 @@ files = [ [[package]] name = "python-multipart" -version = "0.0.17" +version = "0.0.20" description = "A streaming multipart parser for Python" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "python_multipart-0.0.17-py3-none-any.whl", hash = "sha256:15dc4f487e0a9476cc1201261188ee0940165cffc94429b6fc565c4d3045cb5d"}, - {file = "python_multipart-0.0.17.tar.gz", hash = "sha256:41330d831cae6e2f22902704ead2826ea038d0419530eadff3ea80175aec5538"}, + {file = "python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104"}, + {file = "python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13"}, ] [[package]] @@ -8546,7 +8546,7 @@ files = [ {file = "pywin32-310-cp39-cp39-win32.whl", hash = "sha256:851c8d927af0d879221e616ae1f66145253537bbdd321a77e8ef701b443a9a1a"}, {file = "pywin32-310-cp39-cp39-win_amd64.whl", hash = "sha256:96867217335559ac619f00ad70e513c0fcf84b8a3af9fc2bba3b59b97da70475"}, ] -markers = {main = "(extra == \"qdrant\" or extra == \"deepeval\") and platform_system == \"Windows\" or sys_platform == \"win32\"", dev = "sys_platform == \"win32\" and platform_python_implementation != \"PyPy\""} +markers = {main = "(extra == \"qdrant\" or extra == \"deepeval\" or sys_platform == \"win32\") and (platform_system == \"Windows\" or sys_platform == \"win32\")", dev = "sys_platform == \"win32\" and platform_python_implementation != \"PyPy\""} [[package]] name = "pywin32-ctypes" @@ -8783,7 +8783,7 @@ description = "Client library for the Qdrant vector search engine" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "python_version >= \"3.13\" and extra == \"qdrant\"" +markers = "python_full_version == \"3.13.0\" and extra == \"qdrant\"" files = [ {file = "qdrant_client-1.12.1-py3-none-any.whl", hash = "sha256:b2d17ce18e9e767471368380dd3bbc4a0e3a0e2061fedc9af3542084b48451e0"}, {file = "qdrant_client-1.12.1.tar.gz", hash = "sha256:35e8e646f75b7b883b3d2d0ee4c69c5301000bba41c82aa546e985db0f1aeb72"}, @@ -8809,7 +8809,7 @@ description = "Client library for the Qdrant vector search engine" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "python_version < \"3.13\" and extra == \"qdrant\"" +markers = "extra == \"qdrant\" and python_version <= \"3.12\"" files = [ {file = "qdrant_client-1.13.3-py3-none-any.whl", hash = "sha256:f52cacbb936e547d3fceb1aaed3e3c56be0ebfd48e8ea495ea3dbc89c671d1d2"}, {file = "qdrant_client-1.13.3.tar.gz", hash = "sha256:61ca09e07c6d7ac0dfbdeb13dca4fe5f3e08fa430cb0d74d66ef5d023a70adfc"}, @@ -10208,7 +10208,7 @@ description = "Computer algebra system (CAS) in Python" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "python_version == \"3.10\" and (extra == \"chromadb\" or extra == \"codegraph\") or extra == \"chromadb\" or python_version == \"3.12\" and (extra == \"chromadb\" or extra == \"codegraph\") or python_version == \"3.11\" and (extra == \"chromadb\" or extra == \"codegraph\")" +markers = "(extra == \"codegraph\" or extra == \"chromadb\") and (python_version <= \"3.12\" or extra == \"chromadb\")" files = [ {file = "sympy-1.13.3-py3-none-any.whl", hash = "sha256:54612cf55a62755ee71824ce692986f23c88ffa77207b30c1368eda4a7060f73"}, {file = "sympy-1.13.3.tar.gz", hash = "sha256:b27fd2c6530e0ab39e275fc9b683895367e51d5da91baa8d3d64db2565fec4d9"}, @@ -10393,7 +10393,7 @@ description = "A lil' TOML parser" optional = false python-versions = ">=3.8" groups = ["main", "dev"] -markers = "python_version < \"3.11\"" +markers = "python_version == \"3.10\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -11461,7 +11461,7 @@ description = "A small Python utility to set file creation time on Windows" optional = true python-versions = ">=3.5" groups = ["main"] -markers = "extra == \"codegraph\" and sys_platform == \"win32\" and python_version < \"3.13\"" +markers = "sys_platform == \"win32\" and extra == \"codegraph\" and python_version <= \"3.12\"" files = [ {file = "win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390"}, {file = "win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0"}, @@ -11743,4 +11743,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<=3.13" -content-hash = "cef734016cd8fc4430277b30c639d0699361f0553c00fdd8fa0fbadbab0b7e7b" +content-hash = "2759286b7a1877f862c794f9f6785afff6715410bb5a9b8c2051900ab2cd5630" diff --git a/pyproject.toml b/pyproject.toml index 99a081e46..903a2653a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,8 @@ pre-commit = "^4.0.1" scikit-learn = "^1.6.1" limits = "^4.4.1" fastapi = {version = "0.115.7"} -fastapi-users = {version = "14.0.0", extras = ["sqlalchemy"]} +python-multipart = "0.0.20" +fastapi-users = {version = "14.0.1", extras = ["sqlalchemy"]} uvicorn = {version = "0.34.0", optional = true} gunicorn = {version = "^20.1.0", optional = true} dlt = {extras = ["sqlalchemy"], version = "^1.9.0"}