diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index ac8dd5189..0c9b5a653 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -72,13 +72,13 @@ jobs: - name: Install dependencies run: poetry install --no-interaction - # - name: Build with Poetry - # run: poetry build - - # - name: Install Package - # run: | - # cd dist - # pip install *.whl + # - name: Build with Poetry + # run: poetry build + # + # - name: Install Package + # run: | + # cd dist + # pip install *.whl # - name: Download NLTK Punkt Tokenizer Models # run: | diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 32fcd9285..1f084a75e 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -126,6 +126,8 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi print(f"Chunk ({chunk_id}) classified.") + print("document_id", document_id) + content_summary = await get_content_summary(input_text) await add_summary_nodes(graph_client, document_id, content_summary) @@ -171,16 +173,16 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi if __name__ == "__main__": async def test(): - - from cognee.api.v1.add import add - - await add(["A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification"], "test") - - graph = await cognify() + # + # from cognee.api.v1.add import add + # + # await add(["A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification"], "code") + # + # graph = await cognify() from cognee.utils import render_graph - await render_graph(graph, include_color=True, include_nodes=True, include_size=True) + await render_graph(graph, include_color=True, include_nodes=False, include_size=False) import asyncio asyncio.run(test()) diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index 99ffb6b93..af8a7b728 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -18,6 +18,9 @@ class SearchType(Enum): CATEGORIES = 'CATEGORIES' NEIGHBOR = 'NEIGHBOR' SUMMARY = 'SUMMARY' + SUMMARY_CLASSIFICATION = 'SUMMARY_CLASSIFICATION' + NODE_CLASSIFICATION = 'NODE_CLASSIFICATION' + DOCUMENT_CLASSIFICATION = 'DOCUMENT_CLASSIFICATION' @staticmethod def from_str(name: str): diff --git a/cognee/infrastructure/databases/graph/networkx/adapter.py b/cognee/infrastructure/databases/graph/networkx/adapter.py index fc01795d5..d5a8da0b0 100644 --- a/cognee/infrastructure/databases/graph/networkx/adapter.py +++ b/cognee/infrastructure/databases/graph/networkx/adapter.py @@ -41,6 +41,9 @@ class NetworkXAdapter(GraphDBInterface): ) -> None: self.graph.add_nodes_from(nodes) await self.save_graph_to_file(self.filename) + + async def get_graph(self): + return self.graph async def add_edge( self, diff --git a/cognee/infrastructure/llm/prompts/categorize_categories.txt b/cognee/infrastructure/llm/prompts/categorize_categories.txt new file mode 100644 index 000000000..4b14f59bb --- /dev/null +++ b/cognee/infrastructure/llm/prompts/categorize_categories.txt @@ -0,0 +1,2 @@ +Chose the summary that is the most relevant to the query`{{ query }}` +Here are the categories:`{{ categories }}` \ No newline at end of file diff --git a/cognee/infrastructure/llm/prompts/categorize_summary.txt b/cognee/infrastructure/llm/prompts/categorize_summary.txt new file mode 100644 index 000000000..d5e77da36 --- /dev/null +++ b/cognee/infrastructure/llm/prompts/categorize_summary.txt @@ -0,0 +1,2 @@ +Chose the summary that is the most relevant to the query`{{ query }}` +Here are the summaries:`{{ summaries }}` \ No newline at end of file diff --git a/cognee/modules/cognify/graph/add_summary_nodes.py b/cognee/modules/cognify/graph/add_summary_nodes.py index 0ea8dd7ca..89e4666a2 100644 --- a/cognee/modules/cognify/graph/add_summary_nodes.py +++ b/cognee/modules/cognify/graph/add_summary_nodes.py @@ -7,6 +7,7 @@ async def add_summary_nodes(graph_client, document_id, summary): summary_node_id, dict( name = "Summary", + document_id = document_id, summary = summary["summary"], ), ) @@ -20,6 +21,7 @@ async def add_summary_nodes(graph_client, document_id, summary): description_node_id, dict( name = "Description", + document_id= document_id, description = summary["description"], ), ) diff --git a/cognee/modules/search/graph/search_categories.py b/cognee/modules/search/graph/search_categories.py index ce96a4b35..87096ad9e 100644 --- a/cognee/modules/search/graph/search_categories.py +++ b/cognee/modules/search/graph/search_categories.py @@ -1,11 +1,19 @@ -from typing import Union, Dict +from typing import Union, Dict, re + +from cognee.modules.search.llm.extraction.categorize_relevant_category import categorize_relevant_category """ Search categories in the graph and return their summary attributes. """ -from cognee.shared.data_models import GraphDBType +from cognee.shared.data_models import GraphDBType, DefaultContentPrediction import networkx as nx -async def search_categories(graph: Union[nx.Graph, any], query_label: str, infrastructure_config: Dict): +def strip_exact_regex(s, substring): + # Escaping substring to be used in a regex pattern + pattern = re.escape(substring) + # Regex to match the exact substring at the start and end + return re.sub(f"^{pattern}|{pattern}$", "", s) + +async def search_categories(query:str, graph: Union[nx.Graph, any], query_label: str, infrastructure_config: Dict): """ Filter nodes in the graph that contain the specified label and return their summary attributes. This function supports both NetworkX graphs and Neo4j graph databases. @@ -22,8 +30,25 @@ async def search_categories(graph: Union[nx.Graph, any], query_label: str, infra """ # Determine which client is in use based on the configuration if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: - # Logic for NetworkX - return {node: data.get('content_labels') for node, data in graph.nodes(data=True) if query_label in node and 'content_labels' in data} + + categories_and_ids = [ + {'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']} + for _, data in graph.nodes(data=True) + if 'summary' in data + ] + print("summaries_and_ids", categories_and_ids) + check_relevant_category = await categorize_relevant_category(query, categories_and_ids, response_model= infrastructure_config.get_config()["classification_model"]) + print("check_relevant_summary", check_relevant_category) + + connected_nodes = list(graph.neighbors(check_relevant_category['document_id'])) + print("connected_nodes", connected_nodes) + descriptions = {node: graph.nodes[node].get('description', 'No desc available') for node in connected_nodes} + print("descs", descriptions) + return descriptions + + # + # # Logic for NetworkX + # return {node: data.get('content_labels') for node, data in graph.nodes(data=True) if query_label in node and 'content_labels' in data} elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: # Logic for Neo4j diff --git a/cognee/modules/search/graph/search_summary.py b/cognee/modules/search/graph/search_summary.py index be8f3f3e8..4618fc04e 100644 --- a/cognee/modules/search/graph/search_summary.py +++ b/cognee/modules/search/graph/search_summary.py @@ -3,9 +3,19 @@ from typing import Union, Dict import networkx as nx -from cognee.shared.data_models import GraphDBType +from cognee.infrastructure import infrastructure_config -async def search_summary(graph: Union[nx.Graph, any], query: str, infrastructure_config: Dict, other_param: str = None) -> Dict[str, str]: +from cognee.modules.search.llm.extraction.categorize_relevant_summary import categorize_relevant_summary +from cognee.shared.data_models import GraphDBType, ResponseSummaryModel + +import re + +def strip_exact_regex(s, substring): + # Escaping substring to be used in a regex pattern + pattern = re.escape(substring) + # Regex to match the exact substring at the start and end + return re.sub(f"^{pattern}|{pattern}$", "", s) +async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str, str]: """ Filter nodes based on a condition (such as containing 'SUMMARY' in their identifiers) and return their summary attributes. Supports both NetworkX graphs and Neo4j graph databases based on the configuration. @@ -19,8 +29,24 @@ async def search_summary(graph: Union[nx.Graph, any], query: str, infrastructure Returns: - Dict[str, str]: A dictionary where keys are node identifiers containing the query string, and values are their 'summary' attributes. """ + if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: - return {node: data.get('summary') for node, data in graph.nodes(data=True) if query in node and 'summary' in data} + print("graph", graph) + summaries_and_ids = [ + {'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']} + for _, data in graph.nodes(data=True) + if 'summary' in data + ] + print("summaries_and_ids", summaries_and_ids) + check_relevant_summary = await categorize_relevant_summary(query, summaries_and_ids, response_model=ResponseSummaryModel) + print("check_relevant_summary", check_relevant_summary) + + connected_nodes = list(graph.neighbors(check_relevant_summary['document_id'])) + print("connected_nodes", connected_nodes) + descriptions = {node: graph.nodes[node].get('description', 'No desc available') for node in connected_nodes} + print("descs", descriptions) + return descriptions + elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: cypher_query = f""" diff --git a/cognee/modules/search/llm/extraction/categorize_relevant_category.py b/cognee/modules/search/llm/extraction/categorize_relevant_category.py new file mode 100644 index 000000000..d49c9f55b --- /dev/null +++ b/cognee/modules/search/llm/extraction/categorize_relevant_category.py @@ -0,0 +1,17 @@ +from typing import Type +from pydantic import BaseModel +from cognee.infrastructure.llm.prompts import render_prompt +from cognee.infrastructure.llm.get_llm_client import get_llm_client + +async def categorize_relevant_category(query: str, summary, response_model: Type[BaseModel]): + llm_client = get_llm_client() + + enriched_query= render_prompt("categorize_category.txt", {"query": query, "categories": summary}) + + print("enriched_query", enriched_query) + + system_prompt = " Choose the relevant categories and return appropriate output based on the model" + + llm_output = await llm_client.acreate_structured_output(enriched_query, system_prompt, response_model) + + return llm_output.model_dump() diff --git a/cognee/modules/search/llm/extraction/categorize_relevant_summary.py b/cognee/modules/search/llm/extraction/categorize_relevant_summary.py new file mode 100644 index 000000000..2d6be6790 --- /dev/null +++ b/cognee/modules/search/llm/extraction/categorize_relevant_summary.py @@ -0,0 +1,17 @@ +from typing import Type +from pydantic import BaseModel +from cognee.infrastructure.llm.prompts import render_prompt +from cognee.infrastructure.llm.get_llm_client import get_llm_client + +async def categorize_relevant_summary(query: str, summary, response_model: Type[BaseModel]): + llm_client = get_llm_client() + + enriched_query= render_prompt("categorize_summary.txt", {"query": query, "summaries": summary}) + + print("enriched_query", enriched_query) + + system_prompt = " Choose the relevant summary and return appropriate output based on the model" + + llm_output = await llm_client.acreate_structured_output(enriched_query, system_prompt, response_model) + + return llm_output.model_dump() diff --git a/cognee/modules/search/llm/get_relevant_summary.py b/cognee/modules/search/llm/get_relevant_summary.py new file mode 100644 index 000000000..a4af4753b --- /dev/null +++ b/cognee/modules/search/llm/get_relevant_summary.py @@ -0,0 +1,17 @@ +import logging +from typing import List, Dict +from cognee.infrastructure import infrastructure_config +from.extraction.categorize_relevant_summary import categorize_relevant_summary + +logger = logging.getLogger(__name__) + +async def get_cognitive_layers(content: str, categories: List[Dict]): + try: + return (await categorize_relevant_summary( + content, + categories[0], + infrastructure_config.get_config()["categorize_summary_model"] + )).cognitive_layers + except Exception as error: + logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True) + raise error diff --git a/cognee/shared/data_models.py b/cognee/shared/data_models.py index 034c97224..7eea17cdc 100644 --- a/cognee/shared/data_models.py +++ b/cognee/shared/data_models.py @@ -244,3 +244,10 @@ class DefaultGraphModel(BaseModel): documents: List[Document] = [] default_fields: Optional[Dict[str, Any]] = {} default_relationship: Relationship = Relationship(type = "has_properties") + + +class ResponseSummaryModel(BaseModel): + """ Response summary model and existing document id """ + document_id: str + response_summary: str + diff --git a/notebooks/full_run.ipynb b/notebooks/full_run.ipynb index 250993b38..d6dc41261 100644 --- a/notebooks/full_run.ipynb +++ b/notebooks/full_run.ipynb @@ -2,10 +2,31 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "38135bf7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:NetworkXAdapter:File /Users/vasa/Projects/cognee/.cognee_system/databases/cognee_graph.pkl not found. Initializing an empty graph./Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/dlt/common/configuration/container.py:94: DeprecationWarning: currentThread() is deprecated, use current_thread() instead\n", + " if m := re.match(r\"dlt-pool-(\\d+)-\", threading.currentThread().getName()):\n", + "/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/dlt/common/configuration/container.py:94: DeprecationWarning: getName() is deprecated, get the name attribute instead\n", + " if m := re.match(r\"dlt-pool-(\\d+)-\", threading.currentThread().getName()):\n" + ] + }, + { + "data": { + "text/plain": [ + "[[LoadInfo(pipeline=, metrics={'1714493358.732525': [{'started_at': DateTime(2024, 4, 30, 16, 9, 19, 653744, tzinfo=Timezone('UTC')), 'finished_at': DateTime(2024, 4, 30, 16, 9, 19, 957893, tzinfo=Timezone('UTC'))}]}, destination_type='dlt.destinations.duckdb', destination_displayable_credentials='duckdb:///:external:', destination_name='duckdb', environment=None, staging_type=None, staging_name=None, staging_displayable_credentials=None, destination_fingerprint='', dataset_name='code', loads_ids=['1714493358.732525'], load_packages=[LoadPackageInfo(load_id='1714493358.732525', package_path='/Users/vasa/.dlt/pipelines/file_load_from_filesystem/load/loaded/1714493358.732525', state='loaded', schema=Schema file_load_from_filesystem at 13554777360, schema_update={'_dlt_loads': {'name': '_dlt_loads', 'columns': {'load_id': {'name': 'load_id', 'data_type': 'text', 'nullable': False}, 'schema_name': {'name': 'schema_name', 'data_type': 'text', 'nullable': True}, 'status': {'name': 'status', 'data_type': 'bigint', 'nullable': False}, 'inserted_at': {'name': 'inserted_at', 'data_type': 'timestamp', 'nullable': False}, 'schema_version_hash': {'name': 'schema_version_hash', 'data_type': 'text', 'nullable': True}}, 'write_disposition': 'skip', 'resource': '_dlt_loads', 'description': 'Created by DLT. Tracks completed loads', 'table_format': None}, '_dlt_pipeline_state': {'columns': {'version': {'name': 'version', 'data_type': 'bigint', 'nullable': False}, 'engine_version': {'name': 'engine_version', 'data_type': 'bigint', 'nullable': False}, 'pipeline_name': {'name': 'pipeline_name', 'data_type': 'text', 'nullable': False}, 'state': {'name': 'state', 'data_type': 'text', 'nullable': False}, 'created_at': {'name': 'created_at', 'data_type': 'timestamp', 'nullable': False}, 'version_hash': {'name': 'version_hash', 'data_type': 'text', 'nullable': True}, '_dlt_load_id': {'name': '_dlt_load_id', 'data_type': 'text', 'nullable': False}, '_dlt_id': {'name': '_dlt_id', 'data_type': 'text', 'nullable': False, 'unique': True}}, 'write_disposition': 'append', 'name': '_dlt_pipeline_state', 'resource': '_dlt_pipeline_state', 'x-normalizer': {'seen-data': True}, 'table_format': None}, 'file_metadata': {'columns': {'id': {'name': 'id', 'nullable': False, 'merge_key': True, 'data_type': 'text'}, 'name': {'name': 'name', 'data_type': 'text', 'nullable': True}, 'file_path': {'name': 'file_path', 'data_type': 'text', 'nullable': True}, 'extension': {'name': 'extension', 'data_type': 'text', 'nullable': True}, 'mime_type': {'name': 'mime_type', 'data_type': 'text', 'nullable': True}, 'keywords': {'name': 'keywords', 'data_type': 'text', 'nullable': True}, '_dlt_load_id': {'name': '_dlt_load_id', 'data_type': 'text', 'nullable': False}, '_dlt_id': {'name': '_dlt_id', 'data_type': 'text', 'nullable': False, 'unique': True}}, 'write_disposition': 'merge', 'name': 'file_metadata', 'resource': 'data_resources', 'x-normalizer': {'seen-data': True}, 'table_format': None}, '_dlt_version': {'name': '_dlt_version', 'columns': {'version': {'name': 'version', 'data_type': 'bigint', 'nullable': False}, 'engine_version': {'name': 'engine_version', 'data_type': 'bigint', 'nullable': False}, 'inserted_at': {'name': 'inserted_at', 'data_type': 'timestamp', 'nullable': False}, 'schema_name': {'name': 'schema_name', 'data_type': 'text', 'nullable': False}, 'version_hash': {'name': 'version_hash', 'data_type': 'text', 'nullable': False}, 'schema': {'name': 'schema', 'data_type': 'text', 'nullable': False}}, 'write_disposition': 'skip', 'resource': '_dlt_version', 'description': 'Created by DLT. Tracks schema updates', 'table_format': None}}, completed_at=DateTime(2024, 4, 30, 16, 9, 19, 951047, tzinfo=Timezone('UTC')), jobs={'new_jobs': [], 'failed_jobs': [], 'started_jobs': [], 'completed_jobs': [LoadJobInfo(state='completed_jobs', file_path='/Users/vasa/.dlt/pipelines/file_load_from_filesystem/load/loaded/1714493358.732525/completed_jobs/_dlt_pipeline_state.5b1065da97.0.insert_values', file_size=526, created_at=DateTime(2024, 4, 30, 16, 9, 19, 309619, tzinfo=Timezone('UTC')), elapsed=0.6414282321929932, job_file_info=ParsedLoadJobFileName(table_name='_dlt_pipeline_state', file_id='5b1065da97', retry_count=0, file_format='insert_values'), failed_message=None), LoadJobInfo(state='completed_jobs', file_path='/Users/vasa/.dlt/pipelines/file_load_from_filesystem/load/loaded/1714493358.732525/completed_jobs/file_metadata.13d63c321b.0.insert_values', file_size=354, created_at=DateTime(2024, 4, 30, 16, 9, 19, 309748, tzinfo=Timezone('UTC')), elapsed=0.6412985324859619, job_file_info=ParsedLoadJobFileName(table_name='file_metadata', file_id='13d63c321b', retry_count=0, file_format='insert_values'), failed_message=None), LoadJobInfo(state='completed_jobs', file_path='/Users/vasa/.dlt/pipelines/file_load_from_filesystem/load/loaded/1714493358.732525/completed_jobs/file_metadata.c6d93b3a58.0.sql', file_size=401, created_at=DateTime(2024, 4, 30, 16, 9, 19, 721255, tzinfo=Timezone('UTC')), elapsed=0.22979211807250977, job_file_info=ParsedLoadJobFileName(table_name='file_metadata', file_id='c6d93b3a58', retry_count=0, file_format='sql'), failed_message=None)]})], first_run=True)]]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from os import path\n", "import cognee\n", @@ -48,32 +69,207 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "44603a2a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['code']\n" + ] + }, + { + "ename": "CatalogException", + "evalue": "Catalog Error: Table with name file_metadata does not exist!\nDid you mean \"code_staging.file_metadata\"?", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mCatalogException\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 12\u001b[0m\n\u001b[1;32m 8\u001b[0m cognee\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39msystem_root_directory(cognee_directory_path)\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28mprint\u001b[39m(cognee\u001b[38;5;241m.\u001b[39mdatasets\u001b[38;5;241m.\u001b[39mlist_datasets())\n\u001b[0;32m---> 12\u001b[0m train_dataset \u001b[38;5;241m=\u001b[39m \u001b[43mcognee\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdatasets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery_data\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mshort_stories\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mlen\u001b[39m(train_dataset))\n", + "File \u001b[0;32m~/Projects/cognee/cognee/api/v1/datasets/datasets.py:17\u001b[0m, in \u001b[0;36mdatasets.query_data\u001b[0;34m(dataset_name)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;129m@staticmethod\u001b[39m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mquery_data\u001b[39m(dataset_name: \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 16\u001b[0m db \u001b[38;5;241m=\u001b[39m infrastructure_config\u001b[38;5;241m.\u001b[39mget_config(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdatabase_engine\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_files_metadata\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset_name\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Projects/cognee/cognee/infrastructure/databases/relational/duckdb/DuckDBAdapter.py:21\u001b[0m, in \u001b[0;36mDuckDBAdapter.get_files_metadata\u001b[0;34m(self, dataset_name)\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_files_metadata\u001b[39m(\u001b[38;5;28mself\u001b[39m, dataset_name: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m---> 21\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mwith\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_connection\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mas\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mconnection\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mreturn\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSELECT id, name, file_path, extension, mime_type, keywords FROM \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mdataset_name\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.file_metadata;\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_df\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrecords\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Projects/cognee/cognee/infrastructure/databases/relational/duckdb/DuckDBAdapter.py:22\u001b[0m, in \u001b[0;36mDuckDBAdapter.get_files_metadata\u001b[0;34m(self, dataset_name)\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_files_metadata\u001b[39m(\u001b[38;5;28mself\u001b[39m, dataset_name: \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_connection() \u001b[38;5;28;01mas\u001b[39;00m connection:\n\u001b[0;32m---> 22\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSELECT id, name, file_path, extension, mime_type, keywords FROM \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mdataset_name\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.file_metadata;\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mto_df()\u001b[38;5;241m.\u001b[39mto_dict(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrecords\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mCatalogException\u001b[0m: Catalog Error: Table with name file_metadata does not exist!\nDid you mean \"code_staging.file_metadata\"?" + ] + } + ], "source": [ - "from os import path\n", - "import cognee\n", + "# from os import path\n", + "# import cognee\n", "\n", - "data_directory_path = path.abspath(\"../.data\")\n", - "cognee.config.data_root_directory(data_directory_path)\n", + "# data_directory_path = path.abspath(\"../.data\")\n", + "# cognee.config.data_root_directory(data_directory_path)\n", "\n", - "cognee_directory_path = path.abspath(\"../.cognee_system\")\n", - "cognee.config.system_root_directory(cognee_directory_path)\n", + "# cognee_directory_path = path.abspath(\"../.cognee_system\")\n", + "# cognee.config.system_root_directory(cognee_directory_path)\n", "\n", - "print(cognee.datasets.list_datasets())\n", + "# print(cognee.datasets.list_datasets())\n", "\n", - "train_dataset = cognee.datasets.query_data(\"short_stories\")\n", - "print(len(train_dataset))" + "# train_dataset = cognee.datasets.query_data(\"short_stories\")\n", + "# print(len(train_dataset))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "65bfaf09", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:NetworkXAdapter:File /Users/vasa/Projects/cognee/.cognee_system/databases/cognee_graph.pkl not found. Initializing an empty graph.WARNING:NetworkXAdapter:File /Users/vasa/Projects/cognee/.cognee_system/databases/cognee_graph.pkl not found. Initializing an empty graph.ERROR:root:Collection still not found. Creating collection again." + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "050895021b1a44cab961b00c590714ce", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fetching 7 files: 0%| | 0/7 [00:00>\n", + " _warn(f\"unclosed transport {self!r}\", ResourceWarning, source=self)\n", + "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n", + "/opt/homebrew/Cellar/python@3.11/3.11.6_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/selector_events.py:864: ResourceWarning: unclosed transport <_SelectorSocketTransport fd=92 read=idle write=>\n", + " _warn(f\"unclosed transport {self!r}\", ResourceWarning, source=self)\n", + "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n", + "/Users/vasa/Projects/cognee/.venv/lib/python3.11/site-packages/pydantic/main.py:1096: PydanticDeprecatedSince20: The `parse_obj` method is deprecated; use `model_validate` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Node properties: [('constructor_parameters', None), ('from_class', None)]\n", + "Node properties: [('parameters', ['s']), ('return_type', 'int'), ('is_static', False)]\n", + "Node properties: [('is_static', False), ('default_value', '{}')]\n", + "Node properties: [('is_static', False), ('default_value', '0')]\n", + "Node properties: [('is_static', False), ('default_value', '0')]\n", + "Node properties: [('is_static', False), ('default_value', None)]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f1e021c7325f4fe099571713aa4cefed", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fetching 7 files: 0%| | 0/7 [00:00>\n", + " _warn(f\"unclosed transport {self!r}\", ResourceWarning, source=self)\n", + "ResourceWarning: Enable tracemalloc to get the object allocation traceback\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9ee03487aeb94cae97d1b67ea8d239af", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Fetching 7 files: 0%| | 0/7 [00:00" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from os import path\n", "import logging\n", @@ -97,10 +293,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "a514cf38", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Graph is visualized at: https://hub.graphistry.com/graph/graph.html?dataset=bcdf88d11a934508b58e6c7850e73fd3&type=arrow&viztoken=2823d6ff-3dd6-464c-b864-2dcb8f399d79&usertag=1daaf574-pygraphistry-0.33.7&splashAfter=1714419870&info=true\n" + ] + } + ], "source": [ "import networkx as nx\n", "import pandas as pd\n", @@ -141,15 +345,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "e916c484", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "graph MultiDiGraph with 45 nodes and 62 edges\n", + "summaries_and_ids [{'document_id': 'DOCUMENT__d87dfe97f0d55afb9b3bf6cb14f8bb0f', 'Summary': 'Longest Substring Without Repeating Characters'}]\n", + "enriched_query Chose the summary that is the most relevant to the query`Who are French girls?`\n", + "Here are the summaries:`[{'document_id': 'DOCUMENT__d87dfe97f0d55afb9b3bf6cb14f8bb0f', 'Summary': 'Longest Substring Without Repeating Characters'}]`\n", + "check_relevant_summary {'document_id': 'DOCUMENT__d87dfe97f0d55afb9b3bf6cb14f8bb0f', 'response_summary': 'Longest Substring Without Repeating Characters'}\n", + "connected_nodes ['DATA_LABEL_STRING', 'DATA_LABEL_LENGTH', 'DATA_LABEL_SUBSTRING', 'DATA_LABEL_ANSWER', 'DATA_LABEL_BBBBB', 'DATA_LABEL_PWWKEW', 'DATA_LABEL_NOTE', 'DATA_LABEL_PWKE', 'DATA_LABEL_SUBSEQUENCE', 'DATA_LABEL_CLASS', 'DATA_LABEL_OBJECT', 'DATA_LABEL_LENGTHOFLONGESTSUBSTRING', 'DATA_LABEL_TYPE', 'DATA_LABEL_RTYPE', 'DATA_LABEL_MAPSET', 'DATA_SUMMARY__DOCUMENT__d87dfe97f0d55afb9b3bf6cb14f8bb0f', 'DATA_DESCRIPTION__DOCUMENT__d87dfe97f0d55afb9b3bf6cb14f8bb0f', 'COGNITIVE_LAYER__SYNTAX_LAYER', 'COGNITIVE_LAYER__SEMANTIC_LAYER', 'COGNITIVE_LAYER__FUNCTIONAL_LAYER', 'COGNITIVE_LAYER__MODULE_INTERACTION_LAYER', 'COGNITIVE_LAYER__DATA_FLOW_LAYER', 'COGNITIVE_LAYER__CONTROL_FLOW_LAYER', 'COGNITIVE_LAYER__PERFORMANCE_LAYER', 'COGNITIVE_LAYER__SECURITY_LAYER', 'COGNITIVE_LAYER__DOCUMENTATION_AND_COMMENTS_LAYER', 'COGNITIVE_LAYER__CONVENTIONS_AND_STYLE_LAYER', 'COGNITIVE_LAYER__DEPENDENCY_AND_INTEGRATION_LAYER', 'COGNITIVE_LAYER__VERSION_CONTROL_AND_HISTORY_LAYER', 'COGNITIVE_LAYER__TEST_AND_VERIFICATION_LAYER', 'COGNITIVE_LAYER__LICENSE_AND_COPYRIGHT_LAYER', 'COGNITIVE_LAYER__PLATFORM_AND_ENVIRONMENT_LAYER']\n", + "descs {'DATA_LABEL_STRING': 'No desc available', 'DATA_LABEL_LENGTH': 'No desc available', 'DATA_LABEL_SUBSTRING': 'No desc available', 'DATA_LABEL_ANSWER': 'No desc available', 'DATA_LABEL_BBBBB': 'No desc available', 'DATA_LABEL_PWWKEW': 'No desc available', 'DATA_LABEL_NOTE': 'No desc available', 'DATA_LABEL_PWKE': 'No desc available', 'DATA_LABEL_SUBSEQUENCE': 'No desc available', 'DATA_LABEL_CLASS': 'No desc available', 'DATA_LABEL_OBJECT': 'No desc available', 'DATA_LABEL_LENGTHOFLONGESTSUBSTRING': 'No desc available', 'DATA_LABEL_TYPE': 'No desc available', 'DATA_LABEL_RTYPE': 'No desc available', 'DATA_LABEL_MAPSET': 'No desc available', 'DATA_SUMMARY__DOCUMENT__d87dfe97f0d55afb9b3bf6cb14f8bb0f': 'No desc available', 'DATA_DESCRIPTION__DOCUMENT__d87dfe97f0d55afb9b3bf6cb14f8bb0f': \"The task is to find the length of the longest substring in a given string that does not contain any repeating characters. Examples include finding such substrings in 'abcabcbb' (resulting in 'abc' with length 3), 'bbbbb' (resulting in 'b' with length 1), and 'pwwkew' (resulting in 'wke' with length 3), emphasizing that substrings are different from subsequences. The provided Python class 'Solution' includes a method 'lengthOfLongestSubstring' that uses a hash map to track characters and their indices, updating start and result variables to calculate the maximum length of such substrings without repetitions.\", 'COGNITIVE_LAYER__SYNTAX_LAYER': 'This layer deals with the syntactic structure of the source code including tokens, keywords, operators, and control structures, which are essential for understanding its grammatical correctness.', 'COGNITIVE_LAYER__SEMANTIC_LAYER': 'This layer addresses the meanings of individual instructions and the functions they perform within the code. It covers variable declarations, method calls, and data manipulations that carry semantic value.', 'COGNITIVE_LAYER__FUNCTIONAL_LAYER': 'This layer focuses on the algorithmic and logical aspects of the code, assessing how different components interact to fulfill designated tasks and solve specific problems.', 'COGNITIVE_LAYER__MODULE_INTERACTION_LAYER': 'Here, the analysis is on the interaction between different modules, functions, or classes in the source code, illustrating the architectural design and interdependencies.', 'COGNITIVE_LAYER__DATA_FLOW_LAYER': \"This layer examines how data is passed through the system, including variable scopes, parameter passing, and state management, which is crucial for understanding the program's behavior.\", 'COGNITIVE_LAYER__CONTROL_FLOW_LAYER': 'Examining the flow of execution throughout the code (e.g., loops, conditionals, and function calls), this layer is important for understanding the logic and potential execution paths in the program.', 'COGNITIVE_LAYER__PERFORMANCE_LAYER': 'This layer analyzes aspects of the code that affect its performance, such as complexity, optimization, potential bottlenecks, and resource management.', 'COGNITIVE_LAYER__SECURITY_LAYER': 'Focuses on identifying security-related aspects of source code, such as vulnerabilities, security controls, and adherence to secure coding practices.', 'COGNITIVE_LAYER__DOCUMENTATION_AND_COMMENTS_LAYER': \"Includes inline comments, docstrings, and external documentation that provide insights into the developer's intentions, explain complex pieces of code and specify APIs.\", 'COGNITIVE_LAYER__CONVENTIONS_AND_STYLE_LAYER': 'Encompasses coding standards, naming conventions, and formatting that contribute to code readability, maintainability, and consistency across a codebase.', 'COGNITIVE_LAYER__DEPENDENCY_AND_INTEGRATION_LAYER': 'Analyzes external libraries, components, or services that the code interacts with, both at the source level and through build and deployment processes.', 'COGNITIVE_LAYER__VERSION_CONTROL_AND_HISTORY_LAYER': 'Captures changes, version history, collaboration, and branch management within version control systems to understand the development and evolution of the codebase.', 'COGNITIVE_LAYER__TEST_AND_VERIFICATION_LAYER': \"This layer includes test scripts, test cases, and the overall testing strategy implemented to verify the code's functionality and robustness.\", 'COGNITIVE_LAYER__LICENSE_AND_COPYRIGHT_LAYER': 'Deals with legal aspects such as copyright notices, licensing information, and intellectual property concerns related to the source code.', 'COGNITIVE_LAYER__PLATFORM_AND_ENVIRONMENT_LAYER': 'Examines compatibility issues, target runtime environments, and platform-specific considerations that are important for code deployment and execution.'}\n", + "{'DATA_LABEL_STRING': 'No desc available', 'DATA_LABEL_LENGTH': 'No desc available', 'DATA_LABEL_SUBSTRING': 'No desc available', 'DATA_LABEL_ANSWER': 'No desc available', 'DATA_LABEL_BBBBB': 'No desc available', 'DATA_LABEL_PWWKEW': 'No desc available', 'DATA_LABEL_NOTE': 'No desc available', 'DATA_LABEL_PWKE': 'No desc available', 'DATA_LABEL_SUBSEQUENCE': 'No desc available', 'DATA_LABEL_CLASS': 'No desc available', 'DATA_LABEL_OBJECT': 'No desc available', 'DATA_LABEL_LENGTHOFLONGESTSUBSTRING': 'No desc available', 'DATA_LABEL_TYPE': 'No desc available', 'DATA_LABEL_RTYPE': 'No desc available', 'DATA_LABEL_MAPSET': 'No desc available', 'DATA_SUMMARY__DOCUMENT__d87dfe97f0d55afb9b3bf6cb14f8bb0f': 'No desc available', 'DATA_DESCRIPTION__DOCUMENT__d87dfe97f0d55afb9b3bf6cb14f8bb0f': \"The task is to find the length of the longest substring in a given string that does not contain any repeating characters. Examples include finding such substrings in 'abcabcbb' (resulting in 'abc' with length 3), 'bbbbb' (resulting in 'b' with length 1), and 'pwwkew' (resulting in 'wke' with length 3), emphasizing that substrings are different from subsequences. The provided Python class 'Solution' includes a method 'lengthOfLongestSubstring' that uses a hash map to track characters and their indices, updating start and result variables to calculate the maximum length of such substrings without repetitions.\", 'COGNITIVE_LAYER__SYNTAX_LAYER': 'This layer deals with the syntactic structure of the source code including tokens, keywords, operators, and control structures, which are essential for understanding its grammatical correctness.', 'COGNITIVE_LAYER__SEMANTIC_LAYER': 'This layer addresses the meanings of individual instructions and the functions they perform within the code. It covers variable declarations, method calls, and data manipulations that carry semantic value.', 'COGNITIVE_LAYER__FUNCTIONAL_LAYER': 'This layer focuses on the algorithmic and logical aspects of the code, assessing how different components interact to fulfill designated tasks and solve specific problems.', 'COGNITIVE_LAYER__MODULE_INTERACTION_LAYER': 'Here, the analysis is on the interaction between different modules, functions, or classes in the source code, illustrating the architectural design and interdependencies.', 'COGNITIVE_LAYER__DATA_FLOW_LAYER': \"This layer examines how data is passed through the system, including variable scopes, parameter passing, and state management, which is crucial for understanding the program's behavior.\", 'COGNITIVE_LAYER__CONTROL_FLOW_LAYER': 'Examining the flow of execution throughout the code (e.g., loops, conditionals, and function calls), this layer is important for understanding the logic and potential execution paths in the program.', 'COGNITIVE_LAYER__PERFORMANCE_LAYER': 'This layer analyzes aspects of the code that affect its performance, such as complexity, optimization, potential bottlenecks, and resource management.', 'COGNITIVE_LAYER__SECURITY_LAYER': 'Focuses on identifying security-related aspects of source code, such as vulnerabilities, security controls, and adherence to secure coding practices.', 'COGNITIVE_LAYER__DOCUMENTATION_AND_COMMENTS_LAYER': \"Includes inline comments, docstrings, and external documentation that provide insights into the developer's intentions, explain complex pieces of code and specify APIs.\", 'COGNITIVE_LAYER__CONVENTIONS_AND_STYLE_LAYER': 'Encompasses coding standards, naming conventions, and formatting that contribute to code readability, maintainability, and consistency across a codebase.', 'COGNITIVE_LAYER__DEPENDENCY_AND_INTEGRATION_LAYER': 'Analyzes external libraries, components, or services that the code interacts with, both at the source level and through build and deployment processes.', 'COGNITIVE_LAYER__VERSION_CONTROL_AND_HISTORY_LAYER': 'Captures changes, version history, collaboration, and branch management within version control systems to understand the development and evolution of the codebase.', 'COGNITIVE_LAYER__TEST_AND_VERIFICATION_LAYER': \"This layer includes test scripts, test cases, and the overall testing strategy implemented to verify the code's functionality and robustness.\", 'COGNITIVE_LAYER__LICENSE_AND_COPYRIGHT_LAYER': 'Deals with legal aspects such as copyright notices, licensing information, and intellectual property concerns related to the source code.', 'COGNITIVE_LAYER__PLATFORM_AND_ENVIRONMENT_LAYER': 'Examines compatibility issues, target runtime environments, and platform-specific considerations that are important for code deployment and execution.'}\n" + ] + } + ], "source": [ "from os import path\n", "import cognee\n", "from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n", "from cognee.modules.search.vector.search_similarity import search_similarity\n", + "from cognee.modules.search.graph.search_summary import search_summary\n", "\n", "data_directory_path = path.abspath(\"../.data\")\n", "cognee.config.data_root_directory(data_directory_path)\n", @@ -160,12 +380,65 @@ "graph_client = await get_graph_client(GraphDBType.NETWORKX)\n", "graph = graph_client.graph\n", "\n", - "results = await search_similarity(\"Who are French girls?\", graph)\n", + "results = await search_summary(\"Who are French girls?\", graph)\n", "\n", - "for result in results:\n", - " print(\"French girls\" in result)\n", - " print(result)" + "print(results)" ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b2ffa34a-bd42-4556-807d-c32ff82479f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'name': 'Conventions and Style Layer',\n", + " 'description': 'Encompasses coding standards, naming conventions, and formatting that contribute to code readability, maintainability, and consistency across a codebase.'}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "graph.nodes['COGNITIVE_LAYER__CONVENTIONS_AND_STYLE_LAYER']" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "59a56a97-051e-4f49-b1e5-985748e057ad", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'results' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[6], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Assuming connected_nodes is a list of node IDs you obtained from graph.neighbors()\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Safely fetch summaries, providing a default if 'summary' is not available\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m descriptions \u001b[38;5;241m=\u001b[39m {node: graph\u001b[38;5;241m.\u001b[39mnodes[node]\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msummary\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mNo summary available\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m node \u001b[38;5;129;01min\u001b[39;00m \u001b[43mresults\u001b[49m}\n", + "\u001b[0;31mNameError\u001b[0m: name 'results' is not defined" + ] + } + ], + "source": [ + "# Assuming connected_nodes is a list of node IDs you obtained from graph.neighbors()\n", + "\n", + "# Safely fetch summaries, providing a default if 'summary' is not available\n", + "descriptions = {node: graph.nodes[node].get('summary', 'No summary available') for node in connected_nodes}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "497f448c-afc1-4b7e-814f-1ebf55fe510c", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -184,7 +457,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.6" } }, "nbformat": 4, diff --git a/poetry.lock b/poetry.lock index 3711a9966..53879c375 100644 --- a/poetry.lock +++ b/poetry.lock @@ -215,6 +215,17 @@ doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphin test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] trio = ["trio (>=0.23)"] +[[package]] +name = "appdirs" +version = "1.4.4" +description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = "*" +files = [ + {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, + {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, +] + [[package]] name = "appnope" version = "0.1.4" @@ -508,33 +519,33 @@ lxml = ["lxml"] [[package]] name = "black" -version = "24.4.1" +version = "24.4.2" description = "The uncompromising code formatter." optional = false python-versions = ">=3.8" files = [ - {file = "black-24.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f7749fd0d97ff9415975a1432fac7df89bf13c3833cea079e55fa004d5f28c0"}, - {file = "black-24.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:859f3cc5d2051adadf8fd504a01e02b0fd866d7549fff54bc9202d524d2e8bd7"}, - {file = "black-24.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59271c9c29dfa97f7fda51f56c7809b3f78e72fd8d2205189bbd23022a0618b6"}, - {file = "black-24.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:5ed9c34cba223149b5a0144951a0f33d65507cf82c5449cb3c35fe4b515fea9a"}, - {file = "black-24.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9dae3ae59d6f2dc93700fd5034a3115434686e66fd6e63d4dcaa48d19880f2b0"}, - {file = "black-24.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5f8698974a81af83283eb47644f2711b5261138d6d9180c863fce673cbe04b13"}, - {file = "black-24.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f404b6e77043b23d0321fb7772522b876b6de737ad3cb97d6b156638d68ce81"}, - {file = "black-24.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:c94e52b766477bdcd010b872ba0714d5458536dc9d0734eff6583ba7266ffd89"}, - {file = "black-24.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:962d9e953872cdb83b97bb737ad47244ce2938054dc946685a4cad98520dab38"}, - {file = "black-24.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b1d8e3b2486b7dd522b1ab2ba1ec4907f0aa8f5e10a33c4271fb331d1d10b70c"}, - {file = "black-24.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed77e214b785148f57e43ca425b6e0850165144aa727d66ac604e56a70bb7825"}, - {file = "black-24.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:4ef4764437d7eba8386689cd06e1fb5341ee0ae2e9e22582b21178782de7ed94"}, - {file = "black-24.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:92b183f8eef5baf7b20a513abcf982ad616f544f593f6688bb2850d2982911f1"}, - {file = "black-24.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:945abd7b3572add997757c94295bb3e73c6ffaf3366b1f26cb2356a4bffd1dc3"}, - {file = "black-24.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db5154b9e5b478031371d8bc41ff37b33855fa223a6cfba456c9b73fb96f77d4"}, - {file = "black-24.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:afc84c33c1a9aaf3d73140cee776b4ddf73ff429ffe6b7c56dc1c9c10725856d"}, - {file = "black-24.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0889f4eb8b3bdf8b189e41a71cf0dbb8141a98346cd1a2695dea5995d416e940"}, - {file = "black-24.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5bb0143f175db45a55227eefd63e90849d96c266330ba31719e9667d0d5ec3b9"}, - {file = "black-24.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:713a04a78e78f28ef7e8df7a16fe075670ea164860fcef3885e4f3dffc0184b3"}, - {file = "black-24.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:171959bc879637a8cdbc53dc3fddae2a83e151937a28cf605fd175ce61e0e94a"}, - {file = "black-24.4.1-py3-none-any.whl", hash = "sha256:ecbab810604fe02c70b3a08afd39beb599f7cc9afd13e81f5336014133b4fe35"}, - {file = "black-24.4.1.tar.gz", hash = "sha256:5241612dc8cad5b6fd47432b8bd04db80e07cfbc53bb69e9ae18985063bcb8dd"}, + {file = "black-24.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dd1b5a14e417189db4c7b64a6540f31730713d173f0b63e55fabd52d61d8fdce"}, + {file = "black-24.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e537d281831ad0e71007dcdcbe50a71470b978c453fa41ce77186bbe0ed6021"}, + {file = "black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaea3008c281f1038edb473c1aa8ed8143a5535ff18f978a318f10302b254063"}, + {file = "black-24.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:7768a0dbf16a39aa5e9a3ded568bb545c8c2727396d063bbaf847df05b08cd96"}, + {file = "black-24.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:257d724c2c9b1660f353b36c802ccece186a30accc7742c176d29c146df6e474"}, + {file = "black-24.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bdde6f877a18f24844e381d45e9947a49e97933573ac9d4345399be37621e26c"}, + {file = "black-24.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e151054aa00bad1f4e1f04919542885f89f5f7d086b8a59e5000e6c616896ffb"}, + {file = "black-24.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:7e122b1c4fb252fd85df3ca93578732b4749d9be076593076ef4d07a0233c3e1"}, + {file = "black-24.4.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:accf49e151c8ed2c0cdc528691838afd217c50412534e876a19270fea1e28e2d"}, + {file = "black-24.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:88c57dc656038f1ab9f92b3eb5335ee9b021412feaa46330d5eba4e51fe49b04"}, + {file = "black-24.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be8bef99eb46d5021bf053114442914baeb3649a89dc5f3a555c88737e5e98fc"}, + {file = "black-24.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:415e686e87dbbe6f4cd5ef0fbf764af7b89f9057b97c908742b6008cc554b9c0"}, + {file = "black-24.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bf10f7310db693bb62692609b397e8d67257c55f949abde4c67f9cc574492cc7"}, + {file = "black-24.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:98e123f1d5cfd42f886624d84464f7756f60ff6eab89ae845210631714f6db94"}, + {file = "black-24.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48a85f2cb5e6799a9ef05347b476cce6c182d6c71ee36925a6c194d074336ef8"}, + {file = "black-24.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:b1530ae42e9d6d5b670a34db49a94115a64596bc77710b1d05e9801e62ca0a7c"}, + {file = "black-24.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:37aae07b029fa0174d39daf02748b379399b909652a806e5708199bd93899da1"}, + {file = "black-24.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da33a1a5e49c4122ccdfd56cd021ff1ebc4a1ec4e2d01594fef9b6f267a9e741"}, + {file = "black-24.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef703f83fc32e131e9bcc0a5094cfe85599e7109f896fe8bc96cc402f3eb4b6e"}, + {file = "black-24.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:b9176b9832e84308818a99a561e90aa479e73c523b3f77afd07913380ae2eab7"}, + {file = "black-24.4.2-py3-none-any.whl", hash = "sha256:d36ed1124bb81b32f8614555b34cc4259c3fbc7eec17870e8ff8ded335b58d8c"}, + {file = "black-24.4.2.tar.gz", hash = "sha256:c872b53057f000085da66a19c55d68f6f8ddcac2642392ad3a355878406fbd4d"}, ] [package.dependencies] @@ -572,17 +583,17 @@ css = ["tinycss2 (>=1.1.0,<1.3)"] [[package]] name = "boto3" -version = "1.34.91" +version = "1.34.93" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.34.91-py3-none-any.whl", hash = "sha256:97fac686c47647db4b44e4789317e4aeecd38511d71e84f8d20abe33eb630ff1"}, - {file = "boto3-1.34.91.tar.gz", hash = "sha256:5077917041adaaae15eeca340289547ef905ca7e11516e9bd22d394fb5057d2a"}, + {file = "boto3-1.34.93-py3-none-any.whl", hash = "sha256:b59355bf4a1408563969526f314611dbeacc151cf90ecb22af295dcc4fe18def"}, + {file = "boto3-1.34.93.tar.gz", hash = "sha256:e39516e4ca21612932599819662759c04485d53ca457996a913163da11f052a4"}, ] [package.dependencies] -botocore = ">=1.34.91,<1.35.0" +botocore = ">=1.34.93,<1.35.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -591,13 +602,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.91" +version = "1.34.93" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.91-py3-none-any.whl", hash = "sha256:4d1b13f2b1c28ce1743b1e5895ae62bb7e67f892b51882164ea19c27a130852b"}, - {file = "botocore-1.34.91.tar.gz", hash = "sha256:93ef7071292a1b2b9fc26537f8ae3a8227da1177969241939ea3fbdb1a1a1d0c"}, + {file = "botocore-1.34.93-py3-none-any.whl", hash = "sha256:6fbd5a53a2adc9b3d4ebd90ae0ede83a91a41d96231f8a5984051f75495f246d"}, + {file = "botocore-1.34.93.tar.gz", hash = "sha256:79d39b0b87e962991c6dd55e78ce15155099f6fb741be88b1b8a456a702cc150"}, ] [package.dependencies] @@ -624,13 +635,13 @@ files = [ [[package]] name = "cairocffi" -version = "1.6.1" +version = "1.7.0" description = "cffi-based cairo bindings for Python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "cairocffi-1.6.1-py3-none-any.whl", hash = "sha256:aa78ee52b9069d7475eeac457389b6275aa92111895d78fbaa2202a52dac112e"}, - {file = "cairocffi-1.6.1.tar.gz", hash = "sha256:78e6bbe47357640c453d0be929fa49cd05cce2e1286f3d2a1ca9cbda7efdb8b7"}, + {file = "cairocffi-1.7.0-py3-none-any.whl", hash = "sha256:1f29a8d41dbda4090c0aa33bcdea64f3b493e95f74a43ea107c4a8a7b7f632ef"}, + {file = "cairocffi-1.7.0.tar.gz", hash = "sha256:7761863603894305f3160eca68452f373433ca8745ab7dd445bd2c6ce50dcab7"}, ] [package.dependencies] @@ -638,7 +649,7 @@ cffi = ">=1.1.0" [package.extras] doc = ["sphinx", "sphinx_rtd_theme"] -test = ["flake8", "isort", "numpy", "pikepdf", "pytest"] +test = ["numpy", "pikepdf", "pytest", "ruff"] xcb = ["xcffib (>=1.4.0)"] [[package]] @@ -1138,6 +1149,21 @@ files = [ docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] tests = ["pytest", "pytest-cov", "pytest-xdist"] +[[package]] +name = "dataclasses-json" +version = "0.6.4" +description = "Easily serialize dataclasses to and from JSON." +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "dataclasses_json-0.6.4-py3-none-any.whl", hash = "sha256:f90578b8a3177f7552f4e1a6e535e84293cd5da421fcce0642d49c0d7bdf8df2"}, + {file = "dataclasses_json-0.6.4.tar.gz", hash = "sha256:73696ebf24936560cca79a2430cbc4f3dd23ac7bf46ed17f38e5e5e7657a6377"}, +] + +[package.dependencies] +marshmallow = ">=3.18.0,<4.0.0" +typing-inspect = ">=0.4.0,<1" + [[package]] name = "datasets" version = "2.14.7" @@ -1223,6 +1249,41 @@ files = [ {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, ] +[[package]] +name = "deepeval" +version = "0.21.36" +description = "The open-source evaluation framework for LLMs." +optional = false +python-versions = "*" +files = [ + {file = "deepeval-0.21.36-py3-none-any.whl", hash = "sha256:e1c9fa0a37c74e79eb1c21a98437d04d4be964e9db0cc7ce46d2ff7c190772e6"}, + {file = "deepeval-0.21.36.tar.gz", hash = "sha256:5447512c6e60ec9840c1c11b48f697470b26c718e729e7d48b97887a5db095a3"}, +] + +[package.dependencies] +docx2txt = ">=0.8,<1.0" +importlib-metadata = ">=6.0.2" +langchain = "*" +langchain-core = "*" +langchain-openai = "*" +portalocker = "*" +protobuf = "4.25.1" +pydantic = "*" +pytest = "*" +pytest-repeat = "*" +pytest-xdist = "*" +ragas = "*" +requests = "*" +rich = "*" +sentry-sdk = "*" +tabulate = "*" +tenacity = ">=8.2.3,<8.3.0" +tqdm = "*" +typer = "*" + +[package.extras] +dev = ["black"] + [[package]] name = "defusedxml" version = "0.7.1" @@ -1356,6 +1417,16 @@ files = [ {file = "docstring_parser-0.16.tar.gz", hash = "sha256:538beabd0af1e2db0146b6bd3caa526c35a34d61af9fd2887f3a8a27a739aa6e"}, ] +[[package]] +name = "docx2txt" +version = "0.8" +description = "A pure python-based utility to extract text and images from docx files." +optional = false +python-versions = "*" +files = [ + {file = "docx2txt-0.8.tar.gz", hash = "sha256:2c06d98d7cfe2d3947e5760a57d924e3ff07745b379c8737723922e7009236e5"}, +] + [[package]] name = "dspy-ai" version = "2.4.3" @@ -1478,6 +1549,20 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "execnet" +version = "2.1.1" +description = "execnet: rapid multi-Python deployment" +optional = false +python-versions = ">=3.8" +files = [ + {file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"}, + {file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"}, +] + +[package.extras] +testing = ["hatch", "pre-commit", "pytest", "tox"] + [[package]] name = "executing" version = "2.0.1" @@ -2346,22 +2431,22 @@ files = [ [[package]] name = "importlib-metadata" -version = "7.1.0" +version = "6.1.0" description = "Read metadata from Python packages" optional = false -python-versions = ">=3.8" +python-versions = ">=3.7" files = [ - {file = "importlib_metadata-7.1.0-py3-none-any.whl", hash = "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570"}, - {file = "importlib_metadata-7.1.0.tar.gz", hash = "sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"}, + {file = "importlib_metadata-6.1.0-py3-none-any.whl", hash = "sha256:ff80f3b5394912eb1b108fcfd444dc78b7f1f3e16b16188054bd01cb9cb86f09"}, + {file = "importlib_metadata-6.1.0.tar.gz", hash = "sha256:43ce9281e097583d758c2c708c4376371261a02c34682491a8e98352365aad20"}, ] [package.dependencies] zipp = ">=0.5" [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] -testing = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] +testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] [[package]] name = "importlib-resources" @@ -2596,6 +2681,20 @@ files = [ {file = "json5-0.9.25.tar.gz", hash = "sha256:548e41b9be043f9426776f05df8635a00fe06104ea51ed24b67f908856e151ae"}, ] +[[package]] +name = "jsonpatch" +version = "1.33" +description = "Apply JSON-Patches (RFC 6902)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, + {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, +] + +[package.dependencies] +jsonpointer = ">=1.9" + [[package]] name = "jsonpath-ng" version = "1.6.1" @@ -2804,13 +2903,13 @@ test = ["jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-jupyter[server] (> [[package]] name = "jupyterlab" -version = "4.1.6" +version = "4.1.8" description = "JupyterLab computational environment" optional = false python-versions = ">=3.8" files = [ - {file = "jupyterlab-4.1.6-py3-none-any.whl", hash = "sha256:cf3e862bc10dbf4331e4eb37438634f813c238cfc62c71c640b3b3b2caa089a8"}, - {file = "jupyterlab-4.1.6.tar.gz", hash = "sha256:7935f36ba26eb615183a4f5c2bbca5791b5108ce2a00b5505f8cfd100d53648e"}, + {file = "jupyterlab-4.1.8-py3-none-any.whl", hash = "sha256:c3baf3a2f91f89d110ed5786cd18672b9a357129d4e389d2a0dead15e11a4d2c"}, + {file = "jupyterlab-4.1.8.tar.gz", hash = "sha256:3384aded8680e7ce504fd63b8bb89a39df21c9c7694d9e7dc4a68742cdb30f9b"}, ] [package.dependencies] @@ -2822,7 +2921,7 @@ jinja2 = ">=3.0.3" jupyter-core = "*" jupyter-lsp = ">=2.0.0" jupyter-server = ">=2.4.0,<3" -jupyterlab-server = ">=2.19.0,<3" +jupyterlab-server = ">=2.27.1,<3" notebook-shim = ">=0.2" packaging = "*" tomli = {version = ">=1.2.2", markers = "python_version < \"3.11\""} @@ -3017,17 +3116,17 @@ files = [ [[package]] name = "lancedb" -version = "0.6.10" +version = "0.6.11" description = "lancedb" optional = false python-versions = ">=3.8" files = [ - {file = "lancedb-0.6.10-cp38-abi3-macosx_10_15_x86_64.whl", hash = "sha256:c7e10eb5f5fdb22452d7678e9bf3df14e2463331868607067aba4ba19a0f4022"}, - {file = "lancedb-0.6.10-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:3a1548bf3820ff606e4efce840bd0e979206eb1815915fff9f738c1d8e337293"}, - {file = "lancedb-0.6.10-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3874f8aee55c5b4f41b953f4eac28d69c8a9bc10ad992fc7fd2b699a1f4b88ed"}, - {file = "lancedb-0.6.10-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:73c0909ac7ae95d8128d25936fc0ff670cf6d9ae1b44e06ec48f3a60081d61ca"}, - {file = "lancedb-0.6.10-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:750fb23a6eaa7daea241a124be3970447d45ace1c4f894df197e9869d677b6ba"}, - {file = "lancedb-0.6.10-cp38-abi3-win_amd64.whl", hash = "sha256:06a299d91e1d9d2663fea7086efc784bf8643f51ff1532617c4910ec63cd2004"}, + {file = "lancedb-0.6.11-cp38-abi3-macosx_10_15_x86_64.whl", hash = "sha256:79dbc2a79dac7b843e328a3b7eecb1b42f38ad524742111a38efbeaa1f58ddfe"}, + {file = "lancedb-0.6.11-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:bfc6661e1fe5dd75346b4a1980243b4ccecd2b0ad991f8becc6e506a608c2d8a"}, + {file = "lancedb-0.6.11-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0998c2dae676c24b95d492358f80ef6e5100b86335b96bf402af3f28f7ec4f3b"}, + {file = "lancedb-0.6.11-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:b6b289599c4c6a61a70da89d4c3201abec7483d0e03573506014af2dcddfe0c4"}, + {file = "lancedb-0.6.11-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:d789d5a181c4bc42650567cb63ce2772ed902046b08e8e401970df284a4df1c1"}, + {file = "lancedb-0.6.11-cp38-abi3-win_amd64.whl", hash = "sha256:ea12fc94545afb03933d080cce593491e593ab95cbfddf05ab7183bce2bc8d62"}, ] [package.dependencies] @@ -3051,6 +3150,143 @@ docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"] embeddings = ["awscli (>=1.29.57)", "boto3 (>=1.28.57)", "botocore (>=1.31.57)", "cohere", "google-generativeai", "huggingface-hub", "instructorembedding", "open-clip-torch", "openai (>=1.6.1)", "pillow", "sentence-transformers", "torch"] tests = ["aiohttp", "boto3", "duckdb", "pandas (>=1.4)", "polars (>=0.19)", "pytest", "pytest-asyncio", "pytest-mock", "pytz", "tantivy"] +[[package]] +name = "langchain" +version = "0.1.10" +description = "Building applications with LLMs through composability" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "langchain-0.1.10-py3-none-any.whl", hash = "sha256:dcc1c0968b8d946a812155584ecbbeda690c930c3ee27bb5ecc113d954f6cf1a"}, + {file = "langchain-0.1.10.tar.gz", hash = "sha256:17951bcd6d74adc74aa081f260ef5514c449488815314420b7e0f8349f15d932"}, +] + +[package.dependencies] +aiohttp = ">=3.8.3,<4.0.0" +async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""} +dataclasses-json = ">=0.5.7,<0.7" +jsonpatch = ">=1.33,<2.0" +langchain-community = ">=0.0.25,<0.1" +langchain-core = ">=0.1.28,<0.2" +langchain-text-splitters = ">=0.0.1,<0.1" +langsmith = ">=0.1.0,<0.2.0" +numpy = ">=1,<2" +pydantic = ">=1,<3" +PyYAML = ">=5.3" +requests = ">=2,<3" +SQLAlchemy = ">=1.4,<3" +tenacity = ">=8.1.0,<9.0.0" + +[package.extras] +azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (<2)"] +clarifai = ["clarifai (>=9.1.0)"] +cli = ["typer (>=0.9.0,<0.10.0)"] +cohere = ["cohere (>=4,<5)"] +docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"] +embeddings = ["sentence-transformers (>=2,<3)"] +extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "couchbase (>=4.1.9,<5.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "langchain-openai (>=0.0.2,<0.1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] +javascript = ["esprima (>=4.0.1,<5.0.0)"] +llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] +openai = ["openai (<2)", "tiktoken (>=0.3.2,<0.6.0)"] +qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"] +text-helpers = ["chardet (>=5.1.0,<6.0.0)"] + +[[package]] +name = "langchain-community" +version = "0.0.34" +description = "Community contributed LangChain integrations." +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "langchain_community-0.0.34-py3-none-any.whl", hash = "sha256:bc13b21a44bbfca01bff8b35c10a26d71485b57c1d284f499b577ba6e1a5d84a"}, + {file = "langchain_community-0.0.34.tar.gz", hash = "sha256:96e9a807d9b4777820df5a970996f6bf3ad5632137bf0f4d863bd832bdeb2b0f"}, +] + +[package.dependencies] +aiohttp = ">=3.8.3,<4.0.0" +dataclasses-json = ">=0.5.7,<0.7" +langchain-core = ">=0.1.45,<0.2.0" +langsmith = ">=0.1.0,<0.2.0" +numpy = ">=1,<2" +PyYAML = ">=5.3" +requests = ">=2,<3" +SQLAlchemy = ">=1.4,<3" +tenacity = ">=8.1.0,<9.0.0" + +[package.extras] +cli = ["typer (>=0.9.0,<0.10.0)"] +extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cloudpickle (>=2.0.0)", "cohere (>=4,<5)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "elasticsearch (>=8.12.0,<9.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "friendli-client (>=1.2.4,<2.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hdbcli (>=2.19.21,<3.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "httpx (>=0.24.1,<0.25.0)", "httpx-sse (>=0.4.0,<0.5.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.3,<6.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "nvidia-riva-client (>=2.14.0,<3.0.0)", "oci (>=2.119.1,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "premai (>=0.3.25,<0.4.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pyjwt (>=2.8.0,<3.0.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tidb-vector (>=0.0.3,<1.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "tree-sitter (>=0.20.2,<0.21.0)", "tree-sitter-languages (>=1.8.0,<2.0.0)", "upstash-redis (>=0.15.0,<0.16.0)", "vdms (>=0.0.20,<0.0.21)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"] + +[[package]] +name = "langchain-core" +version = "0.1.46" +description = "Building applications with LLMs through composability" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "langchain_core-0.1.46-py3-none-any.whl", hash = "sha256:1c0befcd2665dd4aa153318aa9bf729071644b4c179e491769b8e583b4bf7441"}, + {file = "langchain_core-0.1.46.tar.gz", hash = "sha256:17c416349f5c7a9808e70e3725749a3a2df5088f1ecca045c883871aa95f9c9e"}, +] + +[package.dependencies] +jsonpatch = ">=1.33,<2.0" +langsmith = ">=0.1.0,<0.2.0" +packaging = ">=23.2,<24.0" +pydantic = ">=1,<3" +PyYAML = ">=5.3" +tenacity = ">=8.1.0,<9.0.0" + +[package.extras] +extended-testing = ["jinja2 (>=3,<4)"] + +[[package]] +name = "langchain-openai" +version = "0.1.4" +description = "An integration package connecting OpenAI and LangChain" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "langchain_openai-0.1.4-py3-none-any.whl", hash = "sha256:a349ada8724921e380aab03ee312568f5ca99adbc806f6878d79ff9cd1d6d353"}, + {file = "langchain_openai-0.1.4.tar.gz", hash = "sha256:1a3220464c270d73ea3987010617789adc2099d4d4740b15c7734ab07e1f054b"}, +] + +[package.dependencies] +langchain-core = ">=0.1.46,<0.2.0" +openai = ">=1.10.0,<2.0.0" +tiktoken = ">=0.5.2,<1" + +[[package]] +name = "langchain-text-splitters" +version = "0.0.1" +description = "LangChain text splitting utilities" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "langchain_text_splitters-0.0.1-py3-none-any.whl", hash = "sha256:f5b802f873f5ff6a8b9259ff34d53ed989666ef4e1582e6d1adb3b5520e3839a"}, + {file = "langchain_text_splitters-0.0.1.tar.gz", hash = "sha256:ac459fa98799f5117ad5425a9330b21961321e30bc19a2a2f9f761ddadd62aa1"}, +] + +[package.dependencies] +langchain-core = ">=0.1.28,<0.2.0" + +[package.extras] +extended-testing = ["lxml (>=5.1.0,<6.0.0)"] + +[[package]] +name = "langsmith" +version = "0.1.5" +description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "langsmith-0.1.5-py3-none-any.whl", hash = "sha256:a1811821a923d90e53bcbacdd0988c3c366aff8f4c120d8777e7af8ecda06268"}, + {file = "langsmith-0.1.5.tar.gz", hash = "sha256:aa7a2861aa3d9ae563a077c622953533800466c4e2e539b0d567b84d5fd5b157"}, +] + +[package.dependencies] +pydantic = ">=1,<3" +requests = ">=2,<3" + [[package]] name = "loguru" version = "0.7.2" @@ -3210,6 +3446,25 @@ files = [ {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, ] +[[package]] +name = "marshmallow" +version = "3.21.1" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +optional = false +python-versions = ">=3.8" +files = [ + {file = "marshmallow-3.21.1-py3-none-any.whl", hash = "sha256:f085493f79efb0644f270a9bf2892843142d80d7174bbbd2f3713f2a589dc633"}, + {file = "marshmallow-3.21.1.tar.gz", hash = "sha256:4e65e9e0d80fc9e609574b9983cf32579f305c718afb30d7233ab818571768c3"}, +] + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["marshmallow[tests]", "pre-commit (>=3.5,<4.0)", "tox"] +docs = ["alabaster (==0.7.16)", "autodocsumm (==0.2.12)", "sphinx (==7.2.6)", "sphinx-issues (==4.0.0)", "sphinx-version-warning (==1.1.2)"] +tests = ["pytest", "pytz", "simplejson"] + [[package]] name = "matplotlib" version = "3.8.4" @@ -3832,12 +4087,12 @@ test = ["pep440", "pre-commit", "pytest", "testpath"] [[package]] name = "neo4j" -version = "5.19.0" +version = "5.20.0" description = "Neo4j Bolt driver for Python" optional = false python-versions = ">=3.7" files = [ - {file = "neo4j-5.19.0.tar.gz", hash = "sha256:23704f604214174f3b7d15a38653a1462809986019dfdaf773ff7ca4e1b9e2de"}, + {file = "neo4j-5.20.0.tar.gz", hash = "sha256:c59e54a0c0fa1f109f1d2fa5293c29c2bb30ba388b4f9dd9656919793c10063a"}, ] [package.dependencies] @@ -4198,13 +4453,13 @@ files = [ [[package]] name = "packaging" -version = "24.0" +version = "23.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" files = [ - {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, - {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, ] [[package]] @@ -4665,22 +4920,22 @@ wcwidth = "*" [[package]] name = "protobuf" -version = "4.25.3" +version = "4.25.1" description = "" optional = false python-versions = ">=3.8" files = [ - {file = "protobuf-4.25.3-cp310-abi3-win32.whl", hash = "sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa"}, - {file = "protobuf-4.25.3-cp310-abi3-win_amd64.whl", hash = "sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8"}, - {file = "protobuf-4.25.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c"}, - {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019"}, - {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d"}, - {file = "protobuf-4.25.3-cp38-cp38-win32.whl", hash = "sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2"}, - {file = "protobuf-4.25.3-cp38-cp38-win_amd64.whl", hash = "sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4"}, - {file = "protobuf-4.25.3-cp39-cp39-win32.whl", hash = "sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4"}, - {file = "protobuf-4.25.3-cp39-cp39-win_amd64.whl", hash = "sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c"}, - {file = "protobuf-4.25.3-py3-none-any.whl", hash = "sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9"}, - {file = "protobuf-4.25.3.tar.gz", hash = "sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c"}, + {file = "protobuf-4.25.1-cp310-abi3-win32.whl", hash = "sha256:193f50a6ab78a970c9b4f148e7c750cfde64f59815e86f686c22e26b4fe01ce7"}, + {file = "protobuf-4.25.1-cp310-abi3-win_amd64.whl", hash = "sha256:3497c1af9f2526962f09329fd61a36566305e6c72da2590ae0d7d1322818843b"}, + {file = "protobuf-4.25.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:0bf384e75b92c42830c0a679b0cd4d6e2b36ae0cf3dbb1e1dfdda48a244f4bcd"}, + {file = "protobuf-4.25.1-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:0f881b589ff449bf0b931a711926e9ddaad3b35089cc039ce1af50b21a4ae8cb"}, + {file = "protobuf-4.25.1-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:ca37bf6a6d0046272c152eea90d2e4ef34593aaa32e8873fc14c16440f22d4b7"}, + {file = "protobuf-4.25.1-cp38-cp38-win32.whl", hash = "sha256:abc0525ae2689a8000837729eef7883b9391cd6aa7950249dcf5a4ede230d5dd"}, + {file = "protobuf-4.25.1-cp38-cp38-win_amd64.whl", hash = "sha256:1484f9e692091450e7edf418c939e15bfc8fc68856e36ce399aed6889dae8bb0"}, + {file = "protobuf-4.25.1-cp39-cp39-win32.whl", hash = "sha256:8bdbeaddaac52d15c6dce38c71b03038ef7772b977847eb6d374fc86636fa510"}, + {file = "protobuf-4.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:becc576b7e6b553d22cbdf418686ee4daa443d7217999125c045ad56322dda10"}, + {file = "protobuf-4.25.1-py3-none-any.whl", hash = "sha256:a19731d5e83ae4737bb2a089605e636077ac001d18781b3cf489b9546c7c80d6"}, + {file = "protobuf-4.25.1.tar.gz", hash = "sha256:57d65074b4f5baa4ab5da1605c02be90ac20c8b40fb137d6a8df9f416b0d0ce2"}, ] [[package]] @@ -4999,13 +5254,13 @@ testutils = ["gitpython (>3)"] [[package]] name = "pymdown-extensions" -version = "10.8" +version = "10.8.1" description = "Extension pack for Python Markdown." optional = false python-versions = ">=3.8" files = [ - {file = "pymdown_extensions-10.8-py3-none-any.whl", hash = "sha256:3539003ff0d5e219ba979d2dc961d18fcad5ac259e66c764482e8347b4c0503c"}, - {file = "pymdown_extensions-10.8.tar.gz", hash = "sha256:91ca336caf414e1e5e0626feca86e145de9f85a3921a7bcbd32890b51738c428"}, + {file = "pymdown_extensions-10.8.1-py3-none-any.whl", hash = "sha256:f938326115884f48c6059c67377c46cf631c733ef3629b6eed1349989d1b30cb"}, + {file = "pymdown_extensions-10.8.1.tar.gz", hash = "sha256:3ab1db5c9e21728dabf75192d71471f8e50f216627e9a1fa9535ecb0231b9940"}, ] [package.dependencies] @@ -5061,6 +5316,16 @@ files = [ {file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"}, ] +[[package]] +name = "pysbd" +version = "0.3.4" +description = "pysbd (Python Sentence Boundary Disambiguation) is a rule-based sentence boundary detection that works out-of-the-box across many languages." +optional = false +python-versions = ">=3" +files = [ + {file = "pysbd-0.3.4-py3-none-any.whl", hash = "sha256:cd838939b7b0b185fcf86b0baf6636667dfb6e474743beeff878e9f42e022953"}, +] + [[package]] name = "pytest" version = "7.4.4" @@ -5117,6 +5382,40 @@ black = ">=23" pytest = ">=7" ruff = ">=0.0.258" +[[package]] +name = "pytest-repeat" +version = "0.9.3" +description = "pytest plugin for repeating tests" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest_repeat-0.9.3-py3-none-any.whl", hash = "sha256:26ab2df18226af9d5ce441c858f273121e92ff55f5bb311d25755b8d7abdd8ed"}, + {file = "pytest_repeat-0.9.3.tar.gz", hash = "sha256:ffd3836dfcd67bb270bec648b330e20be37d2966448c4148c4092d1e8aba8185"}, +] + +[package.dependencies] +pytest = "*" + +[[package]] +name = "pytest-xdist" +version = "3.5.0" +description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-xdist-3.5.0.tar.gz", hash = "sha256:cbb36f3d67e0c478baa57fa4edc8843887e0f6cfc42d677530a36d7472b32d8a"}, + {file = "pytest_xdist-3.5.0-py3-none-any.whl", hash = "sha256:d075629c7e00b611df89f490a5063944bee7a4362a5ff11c7cc7824a03dfce24"}, +] + +[package.dependencies] +execnet = ">=1.1" +pytest = ">=6.2.0" + +[package.extras] +psutil = ["psutil (>=3.0)"] +setproctitle = ["setproctitle"] +testing = ["filelock"] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -5401,6 +5700,33 @@ urllib3 = ">=1.26.14,<3" [package.extras] fastembed = ["fastembed (==0.2.6)"] +[[package]] +name = "ragas" +version = "0.1.7" +description = "" +optional = false +python-versions = "*" +files = [ + {file = "ragas-0.1.7-py3-none-any.whl", hash = "sha256:abe02b40a8d11842c42e222226901287858beb70203f1227a403a9261d0bb684"}, + {file = "ragas-0.1.7.tar.gz", hash = "sha256:db857262dda63fc01a7eef837cbba166202084b5d535b2e8ad408c63a66f9319"}, +] + +[package.dependencies] +appdirs = "*" +datasets = "*" +langchain = "*" +langchain-community = "*" +langchain-core = "*" +langchain-openai = "*" +nest-asyncio = "*" +numpy = "*" +openai = ">1" +pysbd = ">=0.3.4" +tiktoken = "*" + +[package.extras] +all = ["sentence-transformers"] + [[package]] name = "ratelimiter" version = "1.2.0.post0" @@ -5905,6 +6231,53 @@ nativelib = ["pyobjc-framework-Cocoa", "pywin32"] objc = ["pyobjc-framework-Cocoa"] win32 = ["pywin32"] +[[package]] +name = "sentry-sdk" +version = "2.0.1" +description = "Python client for Sentry (https://sentry.io)" +optional = false +python-versions = ">=3.6" +files = [ + {file = "sentry_sdk-2.0.1-py2.py3-none-any.whl", hash = "sha256:b54c54a2160f509cf2757260d0cf3885b608c6192c2555a3857e3a4d0f84bdb3"}, + {file = "sentry_sdk-2.0.1.tar.gz", hash = "sha256:c278e0f523f6f0ee69dc43ad26dcdb1202dffe5ac326ae31472e012d941bee21"}, +] + +[package.dependencies] +certifi = "*" +urllib3 = ">=1.26.11" + +[package.extras] +aiohttp = ["aiohttp (>=3.5)"] +arq = ["arq (>=0.23)"] +asyncpg = ["asyncpg (>=0.23)"] +beam = ["apache-beam (>=2.12)"] +bottle = ["bottle (>=0.12.13)"] +celery = ["celery (>=3)"] +celery-redbeat = ["celery-redbeat (>=2)"] +chalice = ["chalice (>=1.16.0)"] +clickhouse-driver = ["clickhouse-driver (>=0.2.0)"] +django = ["django (>=1.8)"] +falcon = ["falcon (>=1.4)"] +fastapi = ["fastapi (>=0.79.0)"] +flask = ["blinker (>=1.1)", "flask (>=0.11)", "markupsafe"] +grpcio = ["grpcio (>=1.21.1)"] +httpx = ["httpx (>=0.16.0)"] +huey = ["huey (>=2)"] +loguru = ["loguru (>=0.5)"] +openai = ["openai (>=1.0.0)", "tiktoken (>=0.3.0)"] +opentelemetry = ["opentelemetry-distro (>=0.35b0)"] +opentelemetry-experimental = ["opentelemetry-distro (>=0.40b0,<1.0)", "opentelemetry-instrumentation-aiohttp-client (>=0.40b0,<1.0)", "opentelemetry-instrumentation-django (>=0.40b0,<1.0)", "opentelemetry-instrumentation-fastapi (>=0.40b0,<1.0)", "opentelemetry-instrumentation-flask (>=0.40b0,<1.0)", "opentelemetry-instrumentation-requests (>=0.40b0,<1.0)", "opentelemetry-instrumentation-sqlite3 (>=0.40b0,<1.0)", "opentelemetry-instrumentation-urllib (>=0.40b0,<1.0)"] +pure-eval = ["asttokens", "executing", "pure-eval"] +pymongo = ["pymongo (>=3.1)"] +pyspark = ["pyspark (>=2.4.4)"] +quart = ["blinker (>=1.1)", "quart (>=0.16.1)"] +rq = ["rq (>=0.6)"] +sanic = ["sanic (>=0.8)"] +sqlalchemy = ["sqlalchemy (>=1.2)"] +starlette = ["starlette (>=0.19.1)"] +starlite = ["starlite (>=1.48)"] +tornado = ["tornado (>=5)"] + [[package]] name = "setuptools" version = "69.5.1" @@ -6644,6 +7017,21 @@ files = [ {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, ] +[[package]] +name = "typing-inspect" +version = "0.9.0" +description = "Runtime inspection utilities for typing module." +optional = false +python-versions = "*" +files = [ + {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, + {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, +] + +[package.dependencies] +mypy-extensions = ">=0.3.0" +typing-extensions = ">=3.7.4" + [[package]] name = "tzdata" version = "2024.1" @@ -7227,7 +7615,7 @@ duckdb = ["duckdb"] filesystem = [] gcp = [] gs = [] -lancedb = [] +lancedb = ["lancedb"] motherduck = ["duckdb", "pyarrow"] mssql = [] neo4j = ["neo4j"] @@ -7244,4 +7632,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.12" -content-hash = "9609dfc41209efd1fd5e53fd1b77353cb5bce3244ffdb65f81eee686a68b6fc5" +content-hash = "0f66c0ad86b74b152f430a3ed9376bf63154eac83b12b7c3dd96f86af4399079" diff --git a/pyproject.toml b/pyproject.toml index d3d5a2da6..94ceb2e5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,9 @@ tiktoken = "^0.6.0" dspy-ai = "2.4.3" posthog = "^3.5.0" lancedb = "^0.6.10" +importlib-metadata = "6.1.0" +deepeval = "^0.21.36" + [tool.poetry.extras]