fix search, add improvements

This commit is contained in:
Vasilije 2024-04-30 23:14:11 +02:00
parent 84fcf6c5a6
commit 8e850c19f8
16 changed files with 907 additions and 120 deletions

View file

@ -72,13 +72,13 @@ jobs:
- name: Install dependencies
run: poetry install --no-interaction
# - name: Build with Poetry
# run: poetry build
# - name: Install Package
# run: |
# cd dist
# pip install *.whl
# - name: Build with Poetry
# run: poetry build
#
# - name: Install Package
# run: |
# cd dist
# pip install *.whl
# - name: Download NLTK Punkt Tokenizer Models
# run: |

View file

@ -126,6 +126,8 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
print(f"Chunk ({chunk_id}) classified.")
print("document_id", document_id)
content_summary = await get_content_summary(input_text)
await add_summary_nodes(graph_client, document_id, content_summary)
@ -171,16 +173,16 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
if __name__ == "__main__":
async def test():
from cognee.api.v1.add import add
await add(["A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification"], "test")
graph = await cognify()
#
# from cognee.api.v1.add import add
#
# await add(["A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification"], "code")
#
# graph = await cognify()
from cognee.utils import render_graph
await render_graph(graph, include_color=True, include_nodes=True, include_size=True)
await render_graph(graph, include_color=True, include_nodes=False, include_size=False)
import asyncio
asyncio.run(test())

View file

@ -18,6 +18,9 @@ class SearchType(Enum):
CATEGORIES = 'CATEGORIES'
NEIGHBOR = 'NEIGHBOR'
SUMMARY = 'SUMMARY'
SUMMARY_CLASSIFICATION = 'SUMMARY_CLASSIFICATION'
NODE_CLASSIFICATION = 'NODE_CLASSIFICATION'
DOCUMENT_CLASSIFICATION = 'DOCUMENT_CLASSIFICATION'
@staticmethod
def from_str(name: str):

View file

@ -41,6 +41,9 @@ class NetworkXAdapter(GraphDBInterface):
) -> None:
self.graph.add_nodes_from(nodes)
await self.save_graph_to_file(self.filename)
async def get_graph(self):
return self.graph
async def add_edge(
self,

View file

@ -0,0 +1,2 @@
Chose the summary that is the most relevant to the query`{{ query }}`
Here are the categories:`{{ categories }}`

View file

@ -0,0 +1,2 @@
Chose the summary that is the most relevant to the query`{{ query }}`
Here are the summaries:`{{ summaries }}`

View file

@ -7,6 +7,7 @@ async def add_summary_nodes(graph_client, document_id, summary):
summary_node_id,
dict(
name = "Summary",
document_id = document_id,
summary = summary["summary"],
),
)
@ -20,6 +21,7 @@ async def add_summary_nodes(graph_client, document_id, summary):
description_node_id,
dict(
name = "Description",
document_id= document_id,
description = summary["description"],
),
)

View file

@ -1,11 +1,19 @@
from typing import Union, Dict
from typing import Union, Dict, re
from cognee.modules.search.llm.extraction.categorize_relevant_category import categorize_relevant_category
""" Search categories in the graph and return their summary attributes. """
from cognee.shared.data_models import GraphDBType
from cognee.shared.data_models import GraphDBType, DefaultContentPrediction
import networkx as nx
async def search_categories(graph: Union[nx.Graph, any], query_label: str, infrastructure_config: Dict):
def strip_exact_regex(s, substring):
# Escaping substring to be used in a regex pattern
pattern = re.escape(substring)
# Regex to match the exact substring at the start and end
return re.sub(f"^{pattern}|{pattern}$", "", s)
async def search_categories(query:str, graph: Union[nx.Graph, any], query_label: str, infrastructure_config: Dict):
"""
Filter nodes in the graph that contain the specified label and return their summary attributes.
This function supports both NetworkX graphs and Neo4j graph databases.
@ -22,8 +30,25 @@ async def search_categories(graph: Union[nx.Graph, any], query_label: str, infra
"""
# Determine which client is in use based on the configuration
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
# Logic for NetworkX
return {node: data.get('content_labels') for node, data in graph.nodes(data=True) if query_label in node and 'content_labels' in data}
categories_and_ids = [
{'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']}
for _, data in graph.nodes(data=True)
if 'summary' in data
]
print("summaries_and_ids", categories_and_ids)
check_relevant_category = await categorize_relevant_category(query, categories_and_ids, response_model= infrastructure_config.get_config()["classification_model"])
print("check_relevant_summary", check_relevant_category)
connected_nodes = list(graph.neighbors(check_relevant_category['document_id']))
print("connected_nodes", connected_nodes)
descriptions = {node: graph.nodes[node].get('description', 'No desc available') for node in connected_nodes}
print("descs", descriptions)
return descriptions
#
# # Logic for NetworkX
# return {node: data.get('content_labels') for node, data in graph.nodes(data=True) if query_label in node and 'content_labels' in data}
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
# Logic for Neo4j

View file

@ -3,9 +3,19 @@
from typing import Union, Dict
import networkx as nx
from cognee.shared.data_models import GraphDBType
from cognee.infrastructure import infrastructure_config
async def search_summary(graph: Union[nx.Graph, any], query: str, infrastructure_config: Dict, other_param: str = None) -> Dict[str, str]:
from cognee.modules.search.llm.extraction.categorize_relevant_summary import categorize_relevant_summary
from cognee.shared.data_models import GraphDBType, ResponseSummaryModel
import re
def strip_exact_regex(s, substring):
# Escaping substring to be used in a regex pattern
pattern = re.escape(substring)
# Regex to match the exact substring at the start and end
return re.sub(f"^{pattern}|{pattern}$", "", s)
async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str, str]:
"""
Filter nodes based on a condition (such as containing 'SUMMARY' in their identifiers) and return their summary attributes.
Supports both NetworkX graphs and Neo4j graph databases based on the configuration.
@ -19,8 +29,24 @@ async def search_summary(graph: Union[nx.Graph, any], query: str, infrastructure
Returns:
- Dict[str, str]: A dictionary where keys are node identifiers containing the query string, and values are their 'summary' attributes.
"""
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
return {node: data.get('summary') for node, data in graph.nodes(data=True) if query in node and 'summary' in data}
print("graph", graph)
summaries_and_ids = [
{'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']}
for _, data in graph.nodes(data=True)
if 'summary' in data
]
print("summaries_and_ids", summaries_and_ids)
check_relevant_summary = await categorize_relevant_summary(query, summaries_and_ids, response_model=ResponseSummaryModel)
print("check_relevant_summary", check_relevant_summary)
connected_nodes = list(graph.neighbors(check_relevant_summary['document_id']))
print("connected_nodes", connected_nodes)
descriptions = {node: graph.nodes[node].get('description', 'No desc available') for node in connected_nodes}
print("descs", descriptions)
return descriptions
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
cypher_query = f"""

View file

@ -0,0 +1,17 @@
from typing import Type
from pydantic import BaseModel
from cognee.infrastructure.llm.prompts import render_prompt
from cognee.infrastructure.llm.get_llm_client import get_llm_client
async def categorize_relevant_category(query: str, summary, response_model: Type[BaseModel]):
llm_client = get_llm_client()
enriched_query= render_prompt("categorize_category.txt", {"query": query, "categories": summary})
print("enriched_query", enriched_query)
system_prompt = " Choose the relevant categories and return appropriate output based on the model"
llm_output = await llm_client.acreate_structured_output(enriched_query, system_prompt, response_model)
return llm_output.model_dump()

View file

@ -0,0 +1,17 @@
from typing import Type
from pydantic import BaseModel
from cognee.infrastructure.llm.prompts import render_prompt
from cognee.infrastructure.llm.get_llm_client import get_llm_client
async def categorize_relevant_summary(query: str, summary, response_model: Type[BaseModel]):
llm_client = get_llm_client()
enriched_query= render_prompt("categorize_summary.txt", {"query": query, "summaries": summary})
print("enriched_query", enriched_query)
system_prompt = " Choose the relevant summary and return appropriate output based on the model"
llm_output = await llm_client.acreate_structured_output(enriched_query, system_prompt, response_model)
return llm_output.model_dump()

View file

@ -0,0 +1,17 @@
import logging
from typing import List, Dict
from cognee.infrastructure import infrastructure_config
from.extraction.categorize_relevant_summary import categorize_relevant_summary
logger = logging.getLogger(__name__)
async def get_cognitive_layers(content: str, categories: List[Dict]):
try:
return (await categorize_relevant_summary(
content,
categories[0],
infrastructure_config.get_config()["categorize_summary_model"]
)).cognitive_layers
except Exception as error:
logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True)
raise error

View file

@ -244,3 +244,10 @@ class DefaultGraphModel(BaseModel):
documents: List[Document] = []
default_fields: Optional[Dict[str, Any]] = {}
default_relationship: Relationship = Relationship(type = "has_properties")
class ResponseSummaryModel(BaseModel):
""" Response summary model and existing document id """
document_id: str
response_summary: str

File diff suppressed because one or more lines are too long

536
poetry.lock generated
View file

@ -215,6 +215,17 @@ doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphin
test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
trio = ["trio (>=0.23)"]
[[package]]
name = "appdirs"
version = "1.4.4"
description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
optional = false
python-versions = "*"
files = [
{file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"},
{file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"},
]
[[package]]
name = "appnope"
version = "0.1.4"
@ -508,33 +519,33 @@ lxml = ["lxml"]
[[package]]
name = "black"
version = "24.4.1"
version = "24.4.2"
description = "The uncompromising code formatter."
optional = false
python-versions = ">=3.8"
files = [
{file = "black-24.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f7749fd0d97ff9415975a1432fac7df89bf13c3833cea079e55fa004d5f28c0"},
{file = "black-24.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:859f3cc5d2051adadf8fd504a01e02b0fd866d7549fff54bc9202d524d2e8bd7"},
{file = "black-24.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59271c9c29dfa97f7fda51f56c7809b3f78e72fd8d2205189bbd23022a0618b6"},
{file = "black-24.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:5ed9c34cba223149b5a0144951a0f33d65507cf82c5449cb3c35fe4b515fea9a"},
{file = "black-24.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9dae3ae59d6f2dc93700fd5034a3115434686e66fd6e63d4dcaa48d19880f2b0"},
{file = "black-24.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5f8698974a81af83283eb47644f2711b5261138d6d9180c863fce673cbe04b13"},
{file = "black-24.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f404b6e77043b23d0321fb7772522b876b6de737ad3cb97d6b156638d68ce81"},
{file = "black-24.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:c94e52b766477bdcd010b872ba0714d5458536dc9d0734eff6583ba7266ffd89"},
{file = "black-24.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:962d9e953872cdb83b97bb737ad47244ce2938054dc946685a4cad98520dab38"},
{file = "black-24.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b1d8e3b2486b7dd522b1ab2ba1ec4907f0aa8f5e10a33c4271fb331d1d10b70c"},
{file = "black-24.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed77e214b785148f57e43ca425b6e0850165144aa727d66ac604e56a70bb7825"},
{file = "black-24.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:4ef4764437d7eba8386689cd06e1fb5341ee0ae2e9e22582b21178782de7ed94"},
{file = "black-24.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:92b183f8eef5baf7b20a513abcf982ad616f544f593f6688bb2850d2982911f1"},
{file = "black-24.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:945abd7b3572add997757c94295bb3e73c6ffaf3366b1f26cb2356a4bffd1dc3"},
{file = "black-24.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db5154b9e5b478031371d8bc41ff37b33855fa223a6cfba456c9b73fb96f77d4"},
{file = "black-24.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:afc84c33c1a9aaf3d73140cee776b4ddf73ff429ffe6b7c56dc1c9c10725856d"},
{file = "black-24.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0889f4eb8b3bdf8b189e41a71cf0dbb8141a98346cd1a2695dea5995d416e940"},
{file = "black-24.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5bb0143f175db45a55227eefd63e90849d96c266330ba31719e9667d0d5ec3b9"},
{file = "black-24.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:713a04a78e78f28ef7e8df7a16fe075670ea164860fcef3885e4f3dffc0184b3"},
{file = "black-24.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:171959bc879637a8cdbc53dc3fddae2a83e151937a28cf605fd175ce61e0e94a"},
{file = "black-24.4.1-py3-none-any.whl", hash = "sha256:ecbab810604fe02c70b3a08afd39beb599f7cc9afd13e81f5336014133b4fe35"},
{file = "black-24.4.1.tar.gz", hash = "sha256:5241612dc8cad5b6fd47432b8bd04db80e07cfbc53bb69e9ae18985063bcb8dd"},
{file = "black-24.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dd1b5a14e417189db4c7b64a6540f31730713d173f0b63e55fabd52d61d8fdce"},
{file = "black-24.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e537d281831ad0e71007dcdcbe50a71470b978c453fa41ce77186bbe0ed6021"},
{file = "black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaea3008c281f1038edb473c1aa8ed8143a5535ff18f978a318f10302b254063"},
{file = "black-24.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:7768a0dbf16a39aa5e9a3ded568bb545c8c2727396d063bbaf847df05b08cd96"},
{file = "black-24.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:257d724c2c9b1660f353b36c802ccece186a30accc7742c176d29c146df6e474"},
{file = "black-24.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bdde6f877a18f24844e381d45e9947a49e97933573ac9d4345399be37621e26c"},
{file = "black-24.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e151054aa00bad1f4e1f04919542885f89f5f7d086b8a59e5000e6c616896ffb"},
{file = "black-24.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:7e122b1c4fb252fd85df3ca93578732b4749d9be076593076ef4d07a0233c3e1"},
{file = "black-24.4.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:accf49e151c8ed2c0cdc528691838afd217c50412534e876a19270fea1e28e2d"},
{file = "black-24.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:88c57dc656038f1ab9f92b3eb5335ee9b021412feaa46330d5eba4e51fe49b04"},
{file = "black-24.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be8bef99eb46d5021bf053114442914baeb3649a89dc5f3a555c88737e5e98fc"},
{file = "black-24.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:415e686e87dbbe6f4cd5ef0fbf764af7b89f9057b97c908742b6008cc554b9c0"},
{file = "black-24.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bf10f7310db693bb62692609b397e8d67257c55f949abde4c67f9cc574492cc7"},
{file = "black-24.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:98e123f1d5cfd42f886624d84464f7756f60ff6eab89ae845210631714f6db94"},
{file = "black-24.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48a85f2cb5e6799a9ef05347b476cce6c182d6c71ee36925a6c194d074336ef8"},
{file = "black-24.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:b1530ae42e9d6d5b670a34db49a94115a64596bc77710b1d05e9801e62ca0a7c"},
{file = "black-24.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:37aae07b029fa0174d39daf02748b379399b909652a806e5708199bd93899da1"},
{file = "black-24.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da33a1a5e49c4122ccdfd56cd021ff1ebc4a1ec4e2d01594fef9b6f267a9e741"},
{file = "black-24.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef703f83fc32e131e9bcc0a5094cfe85599e7109f896fe8bc96cc402f3eb4b6e"},
{file = "black-24.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:b9176b9832e84308818a99a561e90aa479e73c523b3f77afd07913380ae2eab7"},
{file = "black-24.4.2-py3-none-any.whl", hash = "sha256:d36ed1124bb81b32f8614555b34cc4259c3fbc7eec17870e8ff8ded335b58d8c"},
{file = "black-24.4.2.tar.gz", hash = "sha256:c872b53057f000085da66a19c55d68f6f8ddcac2642392ad3a355878406fbd4d"},
]
[package.dependencies]
@ -572,17 +583,17 @@ css = ["tinycss2 (>=1.1.0,<1.3)"]
[[package]]
name = "boto3"
version = "1.34.91"
version = "1.34.93"
description = "The AWS SDK for Python"
optional = false
python-versions = ">=3.8"
files = [
{file = "boto3-1.34.91-py3-none-any.whl", hash = "sha256:97fac686c47647db4b44e4789317e4aeecd38511d71e84f8d20abe33eb630ff1"},
{file = "boto3-1.34.91.tar.gz", hash = "sha256:5077917041adaaae15eeca340289547ef905ca7e11516e9bd22d394fb5057d2a"},
{file = "boto3-1.34.93-py3-none-any.whl", hash = "sha256:b59355bf4a1408563969526f314611dbeacc151cf90ecb22af295dcc4fe18def"},
{file = "boto3-1.34.93.tar.gz", hash = "sha256:e39516e4ca21612932599819662759c04485d53ca457996a913163da11f052a4"},
]
[package.dependencies]
botocore = ">=1.34.91,<1.35.0"
botocore = ">=1.34.93,<1.35.0"
jmespath = ">=0.7.1,<2.0.0"
s3transfer = ">=0.10.0,<0.11.0"
@ -591,13 +602,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
[[package]]
name = "botocore"
version = "1.34.91"
version = "1.34.93"
description = "Low-level, data-driven core of boto 3."
optional = false
python-versions = ">=3.8"
files = [
{file = "botocore-1.34.91-py3-none-any.whl", hash = "sha256:4d1b13f2b1c28ce1743b1e5895ae62bb7e67f892b51882164ea19c27a130852b"},
{file = "botocore-1.34.91.tar.gz", hash = "sha256:93ef7071292a1b2b9fc26537f8ae3a8227da1177969241939ea3fbdb1a1a1d0c"},
{file = "botocore-1.34.93-py3-none-any.whl", hash = "sha256:6fbd5a53a2adc9b3d4ebd90ae0ede83a91a41d96231f8a5984051f75495f246d"},
{file = "botocore-1.34.93.tar.gz", hash = "sha256:79d39b0b87e962991c6dd55e78ce15155099f6fb741be88b1b8a456a702cc150"},
]
[package.dependencies]
@ -624,13 +635,13 @@ files = [
[[package]]
name = "cairocffi"
version = "1.6.1"
version = "1.7.0"
description = "cffi-based cairo bindings for Python"
optional = false
python-versions = ">=3.7"
python-versions = ">=3.8"
files = [
{file = "cairocffi-1.6.1-py3-none-any.whl", hash = "sha256:aa78ee52b9069d7475eeac457389b6275aa92111895d78fbaa2202a52dac112e"},
{file = "cairocffi-1.6.1.tar.gz", hash = "sha256:78e6bbe47357640c453d0be929fa49cd05cce2e1286f3d2a1ca9cbda7efdb8b7"},
{file = "cairocffi-1.7.0-py3-none-any.whl", hash = "sha256:1f29a8d41dbda4090c0aa33bcdea64f3b493e95f74a43ea107c4a8a7b7f632ef"},
{file = "cairocffi-1.7.0.tar.gz", hash = "sha256:7761863603894305f3160eca68452f373433ca8745ab7dd445bd2c6ce50dcab7"},
]
[package.dependencies]
@ -638,7 +649,7 @@ cffi = ">=1.1.0"
[package.extras]
doc = ["sphinx", "sphinx_rtd_theme"]
test = ["flake8", "isort", "numpy", "pikepdf", "pytest"]
test = ["numpy", "pikepdf", "pytest", "ruff"]
xcb = ["xcffib (>=1.4.0)"]
[[package]]
@ -1138,6 +1149,21 @@ files = [
docs = ["ipython", "matplotlib", "numpydoc", "sphinx"]
tests = ["pytest", "pytest-cov", "pytest-xdist"]
[[package]]
name = "dataclasses-json"
version = "0.6.4"
description = "Easily serialize dataclasses to and from JSON."
optional = false
python-versions = ">=3.7,<4.0"
files = [
{file = "dataclasses_json-0.6.4-py3-none-any.whl", hash = "sha256:f90578b8a3177f7552f4e1a6e535e84293cd5da421fcce0642d49c0d7bdf8df2"},
{file = "dataclasses_json-0.6.4.tar.gz", hash = "sha256:73696ebf24936560cca79a2430cbc4f3dd23ac7bf46ed17f38e5e5e7657a6377"},
]
[package.dependencies]
marshmallow = ">=3.18.0,<4.0.0"
typing-inspect = ">=0.4.0,<1"
[[package]]
name = "datasets"
version = "2.14.7"
@ -1223,6 +1249,41 @@ files = [
{file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
]
[[package]]
name = "deepeval"
version = "0.21.36"
description = "The open-source evaluation framework for LLMs."
optional = false
python-versions = "*"
files = [
{file = "deepeval-0.21.36-py3-none-any.whl", hash = "sha256:e1c9fa0a37c74e79eb1c21a98437d04d4be964e9db0cc7ce46d2ff7c190772e6"},
{file = "deepeval-0.21.36.tar.gz", hash = "sha256:5447512c6e60ec9840c1c11b48f697470b26c718e729e7d48b97887a5db095a3"},
]
[package.dependencies]
docx2txt = ">=0.8,<1.0"
importlib-metadata = ">=6.0.2"
langchain = "*"
langchain-core = "*"
langchain-openai = "*"
portalocker = "*"
protobuf = "4.25.1"
pydantic = "*"
pytest = "*"
pytest-repeat = "*"
pytest-xdist = "*"
ragas = "*"
requests = "*"
rich = "*"
sentry-sdk = "*"
tabulate = "*"
tenacity = ">=8.2.3,<8.3.0"
tqdm = "*"
typer = "*"
[package.extras]
dev = ["black"]
[[package]]
name = "defusedxml"
version = "0.7.1"
@ -1356,6 +1417,16 @@ files = [
{file = "docstring_parser-0.16.tar.gz", hash = "sha256:538beabd0af1e2db0146b6bd3caa526c35a34d61af9fd2887f3a8a27a739aa6e"},
]
[[package]]
name = "docx2txt"
version = "0.8"
description = "A pure python-based utility to extract text and images from docx files."
optional = false
python-versions = "*"
files = [
{file = "docx2txt-0.8.tar.gz", hash = "sha256:2c06d98d7cfe2d3947e5760a57d924e3ff07745b379c8737723922e7009236e5"},
]
[[package]]
name = "dspy-ai"
version = "2.4.3"
@ -1478,6 +1549,20 @@ files = [
[package.extras]
test = ["pytest (>=6)"]
[[package]]
name = "execnet"
version = "2.1.1"
description = "execnet: rapid multi-Python deployment"
optional = false
python-versions = ">=3.8"
files = [
{file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"},
{file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"},
]
[package.extras]
testing = ["hatch", "pre-commit", "pytest", "tox"]
[[package]]
name = "executing"
version = "2.0.1"
@ -2346,22 +2431,22 @@ files = [
[[package]]
name = "importlib-metadata"
version = "7.1.0"
version = "6.1.0"
description = "Read metadata from Python packages"
optional = false
python-versions = ">=3.8"
python-versions = ">=3.7"
files = [
{file = "importlib_metadata-7.1.0-py3-none-any.whl", hash = "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570"},
{file = "importlib_metadata-7.1.0.tar.gz", hash = "sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"},
{file = "importlib_metadata-6.1.0-py3-none-any.whl", hash = "sha256:ff80f3b5394912eb1b108fcfd444dc78b7f1f3e16b16188054bd01cb9cb86f09"},
{file = "importlib_metadata-6.1.0.tar.gz", hash = "sha256:43ce9281e097583d758c2c708c4376371261a02c34682491a8e98352365aad20"},
]
[package.dependencies]
zipp = ">=0.5"
[package.extras]
docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
perf = ["ipython"]
testing = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"]
testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"]
[[package]]
name = "importlib-resources"
@ -2596,6 +2681,20 @@ files = [
{file = "json5-0.9.25.tar.gz", hash = "sha256:548e41b9be043f9426776f05df8635a00fe06104ea51ed24b67f908856e151ae"},
]
[[package]]
name = "jsonpatch"
version = "1.33"
description = "Apply JSON-Patches (RFC 6902)"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
files = [
{file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"},
{file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"},
]
[package.dependencies]
jsonpointer = ">=1.9"
[[package]]
name = "jsonpath-ng"
version = "1.6.1"
@ -2804,13 +2903,13 @@ test = ["jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-jupyter[server] (>
[[package]]
name = "jupyterlab"
version = "4.1.6"
version = "4.1.8"
description = "JupyterLab computational environment"
optional = false
python-versions = ">=3.8"
files = [
{file = "jupyterlab-4.1.6-py3-none-any.whl", hash = "sha256:cf3e862bc10dbf4331e4eb37438634f813c238cfc62c71c640b3b3b2caa089a8"},
{file = "jupyterlab-4.1.6.tar.gz", hash = "sha256:7935f36ba26eb615183a4f5c2bbca5791b5108ce2a00b5505f8cfd100d53648e"},
{file = "jupyterlab-4.1.8-py3-none-any.whl", hash = "sha256:c3baf3a2f91f89d110ed5786cd18672b9a357129d4e389d2a0dead15e11a4d2c"},
{file = "jupyterlab-4.1.8.tar.gz", hash = "sha256:3384aded8680e7ce504fd63b8bb89a39df21c9c7694d9e7dc4a68742cdb30f9b"},
]
[package.dependencies]
@ -2822,7 +2921,7 @@ jinja2 = ">=3.0.3"
jupyter-core = "*"
jupyter-lsp = ">=2.0.0"
jupyter-server = ">=2.4.0,<3"
jupyterlab-server = ">=2.19.0,<3"
jupyterlab-server = ">=2.27.1,<3"
notebook-shim = ">=0.2"
packaging = "*"
tomli = {version = ">=1.2.2", markers = "python_version < \"3.11\""}
@ -3017,17 +3116,17 @@ files = [
[[package]]
name = "lancedb"
version = "0.6.10"
version = "0.6.11"
description = "lancedb"
optional = false
python-versions = ">=3.8"
files = [
{file = "lancedb-0.6.10-cp38-abi3-macosx_10_15_x86_64.whl", hash = "sha256:c7e10eb5f5fdb22452d7678e9bf3df14e2463331868607067aba4ba19a0f4022"},
{file = "lancedb-0.6.10-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:3a1548bf3820ff606e4efce840bd0e979206eb1815915fff9f738c1d8e337293"},
{file = "lancedb-0.6.10-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3874f8aee55c5b4f41b953f4eac28d69c8a9bc10ad992fc7fd2b699a1f4b88ed"},
{file = "lancedb-0.6.10-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:73c0909ac7ae95d8128d25936fc0ff670cf6d9ae1b44e06ec48f3a60081d61ca"},
{file = "lancedb-0.6.10-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:750fb23a6eaa7daea241a124be3970447d45ace1c4f894df197e9869d677b6ba"},
{file = "lancedb-0.6.10-cp38-abi3-win_amd64.whl", hash = "sha256:06a299d91e1d9d2663fea7086efc784bf8643f51ff1532617c4910ec63cd2004"},
{file = "lancedb-0.6.11-cp38-abi3-macosx_10_15_x86_64.whl", hash = "sha256:79dbc2a79dac7b843e328a3b7eecb1b42f38ad524742111a38efbeaa1f58ddfe"},
{file = "lancedb-0.6.11-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:bfc6661e1fe5dd75346b4a1980243b4ccecd2b0ad991f8becc6e506a608c2d8a"},
{file = "lancedb-0.6.11-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0998c2dae676c24b95d492358f80ef6e5100b86335b96bf402af3f28f7ec4f3b"},
{file = "lancedb-0.6.11-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:b6b289599c4c6a61a70da89d4c3201abec7483d0e03573506014af2dcddfe0c4"},
{file = "lancedb-0.6.11-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:d789d5a181c4bc42650567cb63ce2772ed902046b08e8e401970df284a4df1c1"},
{file = "lancedb-0.6.11-cp38-abi3-win_amd64.whl", hash = "sha256:ea12fc94545afb03933d080cce593491e593ab95cbfddf05ab7183bce2bc8d62"},
]
[package.dependencies]
@ -3051,6 +3150,143 @@ docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
embeddings = ["awscli (>=1.29.57)", "boto3 (>=1.28.57)", "botocore (>=1.31.57)", "cohere", "google-generativeai", "huggingface-hub", "instructorembedding", "open-clip-torch", "openai (>=1.6.1)", "pillow", "sentence-transformers", "torch"]
tests = ["aiohttp", "boto3", "duckdb", "pandas (>=1.4)", "polars (>=0.19)", "pytest", "pytest-asyncio", "pytest-mock", "pytz", "tantivy"]
[[package]]
name = "langchain"
version = "0.1.10"
description = "Building applications with LLMs through composability"
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
{file = "langchain-0.1.10-py3-none-any.whl", hash = "sha256:dcc1c0968b8d946a812155584ecbbeda690c930c3ee27bb5ecc113d954f6cf1a"},
{file = "langchain-0.1.10.tar.gz", hash = "sha256:17951bcd6d74adc74aa081f260ef5514c449488815314420b7e0f8349f15d932"},
]
[package.dependencies]
aiohttp = ">=3.8.3,<4.0.0"
async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""}
dataclasses-json = ">=0.5.7,<0.7"
jsonpatch = ">=1.33,<2.0"
langchain-community = ">=0.0.25,<0.1"
langchain-core = ">=0.1.28,<0.2"
langchain-text-splitters = ">=0.0.1,<0.1"
langsmith = ">=0.1.0,<0.2.0"
numpy = ">=1,<2"
pydantic = ">=1,<3"
PyYAML = ">=5.3"
requests = ">=2,<3"
SQLAlchemy = ">=1.4,<3"
tenacity = ">=8.1.0,<9.0.0"
[package.extras]
azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-textanalytics (>=5.3.0,<6.0.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0b8)", "openai (<2)"]
clarifai = ["clarifai (>=9.1.0)"]
cli = ["typer (>=0.9.0,<0.10.0)"]
cohere = ["cohere (>=4,<5)"]
docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"]
embeddings = ["sentence-transformers (>=2,<3)"]
extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "couchbase (>=4.1.9,<5.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "langchain-openai (>=0.0.2,<0.1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
javascript = ["esprima (>=4.0.1,<5.0.0)"]
llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
openai = ["openai (<2)", "tiktoken (>=0.3.2,<0.6.0)"]
qdrant = ["qdrant-client (>=1.3.1,<2.0.0)"]
text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
[[package]]
name = "langchain-community"
version = "0.0.34"
description = "Community contributed LangChain integrations."
optional = false
python-versions = "<4.0,>=3.8.1"
files = [
{file = "langchain_community-0.0.34-py3-none-any.whl", hash = "sha256:bc13b21a44bbfca01bff8b35c10a26d71485b57c1d284f499b577ba6e1a5d84a"},
{file = "langchain_community-0.0.34.tar.gz", hash = "sha256:96e9a807d9b4777820df5a970996f6bf3ad5632137bf0f4d863bd832bdeb2b0f"},
]
[package.dependencies]
aiohttp = ">=3.8.3,<4.0.0"
dataclasses-json = ">=0.5.7,<0.7"
langchain-core = ">=0.1.45,<0.2.0"
langsmith = ">=0.1.0,<0.2.0"
numpy = ">=1,<2"
PyYAML = ">=5.3"
requests = ">=2,<3"
SQLAlchemy = ">=1.4,<3"
tenacity = ">=8.1.0,<9.0.0"
[package.extras]
cli = ["typer (>=0.9.0,<0.10.0)"]
extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cloudpickle (>=2.0.0)", "cohere (>=4,<5)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "elasticsearch (>=8.12.0,<9.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "friendli-client (>=1.2.4,<2.0.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hdbcli (>=2.19.21,<3.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "httpx (>=0.24.1,<0.25.0)", "httpx-sse (>=0.4.0,<0.5.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.3,<6.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "nvidia-riva-client (>=2.14.0,<3.0.0)", "oci (>=2.119.1,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "premai (>=0.3.25,<0.4.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pyjwt (>=2.8.0,<3.0.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "tidb-vector (>=0.0.3,<1.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "tree-sitter (>=0.20.2,<0.21.0)", "tree-sitter-languages (>=1.8.0,<2.0.0)", "upstash-redis (>=0.15.0,<0.16.0)", "vdms (>=0.0.20,<0.0.21)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
[[package]]
name = "langchain-core"
version = "0.1.46"
description = "Building applications with LLMs through composability"
optional = false
python-versions = "<4.0,>=3.8.1"
files = [
{file = "langchain_core-0.1.46-py3-none-any.whl", hash = "sha256:1c0befcd2665dd4aa153318aa9bf729071644b4c179e491769b8e583b4bf7441"},
{file = "langchain_core-0.1.46.tar.gz", hash = "sha256:17c416349f5c7a9808e70e3725749a3a2df5088f1ecca045c883871aa95f9c9e"},
]
[package.dependencies]
jsonpatch = ">=1.33,<2.0"
langsmith = ">=0.1.0,<0.2.0"
packaging = ">=23.2,<24.0"
pydantic = ">=1,<3"
PyYAML = ">=5.3"
tenacity = ">=8.1.0,<9.0.0"
[package.extras]
extended-testing = ["jinja2 (>=3,<4)"]
[[package]]
name = "langchain-openai"
version = "0.1.4"
description = "An integration package connecting OpenAI and LangChain"
optional = false
python-versions = "<4.0,>=3.8.1"
files = [
{file = "langchain_openai-0.1.4-py3-none-any.whl", hash = "sha256:a349ada8724921e380aab03ee312568f5ca99adbc806f6878d79ff9cd1d6d353"},
{file = "langchain_openai-0.1.4.tar.gz", hash = "sha256:1a3220464c270d73ea3987010617789adc2099d4d4740b15c7734ab07e1f054b"},
]
[package.dependencies]
langchain-core = ">=0.1.46,<0.2.0"
openai = ">=1.10.0,<2.0.0"
tiktoken = ">=0.5.2,<1"
[[package]]
name = "langchain-text-splitters"
version = "0.0.1"
description = "LangChain text splitting utilities"
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
{file = "langchain_text_splitters-0.0.1-py3-none-any.whl", hash = "sha256:f5b802f873f5ff6a8b9259ff34d53ed989666ef4e1582e6d1adb3b5520e3839a"},
{file = "langchain_text_splitters-0.0.1.tar.gz", hash = "sha256:ac459fa98799f5117ad5425a9330b21961321e30bc19a2a2f9f761ddadd62aa1"},
]
[package.dependencies]
langchain-core = ">=0.1.28,<0.2.0"
[package.extras]
extended-testing = ["lxml (>=5.1.0,<6.0.0)"]
[[package]]
name = "langsmith"
version = "0.1.5"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
optional = false
python-versions = ">=3.8.1,<4.0"
files = [
{file = "langsmith-0.1.5-py3-none-any.whl", hash = "sha256:a1811821a923d90e53bcbacdd0988c3c366aff8f4c120d8777e7af8ecda06268"},
{file = "langsmith-0.1.5.tar.gz", hash = "sha256:aa7a2861aa3d9ae563a077c622953533800466c4e2e539b0d567b84d5fd5b157"},
]
[package.dependencies]
pydantic = ">=1,<3"
requests = ">=2,<3"
[[package]]
name = "loguru"
version = "0.7.2"
@ -3210,6 +3446,25 @@ files = [
{file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"},
]
[[package]]
name = "marshmallow"
version = "3.21.1"
description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
optional = false
python-versions = ">=3.8"
files = [
{file = "marshmallow-3.21.1-py3-none-any.whl", hash = "sha256:f085493f79efb0644f270a9bf2892843142d80d7174bbbd2f3713f2a589dc633"},
{file = "marshmallow-3.21.1.tar.gz", hash = "sha256:4e65e9e0d80fc9e609574b9983cf32579f305c718afb30d7233ab818571768c3"},
]
[package.dependencies]
packaging = ">=17.0"
[package.extras]
dev = ["marshmallow[tests]", "pre-commit (>=3.5,<4.0)", "tox"]
docs = ["alabaster (==0.7.16)", "autodocsumm (==0.2.12)", "sphinx (==7.2.6)", "sphinx-issues (==4.0.0)", "sphinx-version-warning (==1.1.2)"]
tests = ["pytest", "pytz", "simplejson"]
[[package]]
name = "matplotlib"
version = "3.8.4"
@ -3832,12 +4087,12 @@ test = ["pep440", "pre-commit", "pytest", "testpath"]
[[package]]
name = "neo4j"
version = "5.19.0"
version = "5.20.0"
description = "Neo4j Bolt driver for Python"
optional = false
python-versions = ">=3.7"
files = [
{file = "neo4j-5.19.0.tar.gz", hash = "sha256:23704f604214174f3b7d15a38653a1462809986019dfdaf773ff7ca4e1b9e2de"},
{file = "neo4j-5.20.0.tar.gz", hash = "sha256:c59e54a0c0fa1f109f1d2fa5293c29c2bb30ba388b4f9dd9656919793c10063a"},
]
[package.dependencies]
@ -4198,13 +4453,13 @@ files = [
[[package]]
name = "packaging"
version = "24.0"
version = "23.2"
description = "Core utilities for Python packages"
optional = false
python-versions = ">=3.7"
files = [
{file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"},
{file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"},
{file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
{file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
]
[[package]]
@ -4665,22 +4920,22 @@ wcwidth = "*"
[[package]]
name = "protobuf"
version = "4.25.3"
version = "4.25.1"
description = ""
optional = false
python-versions = ">=3.8"
files = [
{file = "protobuf-4.25.3-cp310-abi3-win32.whl", hash = "sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa"},
{file = "protobuf-4.25.3-cp310-abi3-win_amd64.whl", hash = "sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8"},
{file = "protobuf-4.25.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c"},
{file = "protobuf-4.25.3-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019"},
{file = "protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d"},
{file = "protobuf-4.25.3-cp38-cp38-win32.whl", hash = "sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2"},
{file = "protobuf-4.25.3-cp38-cp38-win_amd64.whl", hash = "sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4"},
{file = "protobuf-4.25.3-cp39-cp39-win32.whl", hash = "sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4"},
{file = "protobuf-4.25.3-cp39-cp39-win_amd64.whl", hash = "sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c"},
{file = "protobuf-4.25.3-py3-none-any.whl", hash = "sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9"},
{file = "protobuf-4.25.3.tar.gz", hash = "sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c"},
{file = "protobuf-4.25.1-cp310-abi3-win32.whl", hash = "sha256:193f50a6ab78a970c9b4f148e7c750cfde64f59815e86f686c22e26b4fe01ce7"},
{file = "protobuf-4.25.1-cp310-abi3-win_amd64.whl", hash = "sha256:3497c1af9f2526962f09329fd61a36566305e6c72da2590ae0d7d1322818843b"},
{file = "protobuf-4.25.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:0bf384e75b92c42830c0a679b0cd4d6e2b36ae0cf3dbb1e1dfdda48a244f4bcd"},
{file = "protobuf-4.25.1-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:0f881b589ff449bf0b931a711926e9ddaad3b35089cc039ce1af50b21a4ae8cb"},
{file = "protobuf-4.25.1-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:ca37bf6a6d0046272c152eea90d2e4ef34593aaa32e8873fc14c16440f22d4b7"},
{file = "protobuf-4.25.1-cp38-cp38-win32.whl", hash = "sha256:abc0525ae2689a8000837729eef7883b9391cd6aa7950249dcf5a4ede230d5dd"},
{file = "protobuf-4.25.1-cp38-cp38-win_amd64.whl", hash = "sha256:1484f9e692091450e7edf418c939e15bfc8fc68856e36ce399aed6889dae8bb0"},
{file = "protobuf-4.25.1-cp39-cp39-win32.whl", hash = "sha256:8bdbeaddaac52d15c6dce38c71b03038ef7772b977847eb6d374fc86636fa510"},
{file = "protobuf-4.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:becc576b7e6b553d22cbdf418686ee4daa443d7217999125c045ad56322dda10"},
{file = "protobuf-4.25.1-py3-none-any.whl", hash = "sha256:a19731d5e83ae4737bb2a089605e636077ac001d18781b3cf489b9546c7c80d6"},
{file = "protobuf-4.25.1.tar.gz", hash = "sha256:57d65074b4f5baa4ab5da1605c02be90ac20c8b40fb137d6a8df9f416b0d0ce2"},
]
[[package]]
@ -4999,13 +5254,13 @@ testutils = ["gitpython (>3)"]
[[package]]
name = "pymdown-extensions"
version = "10.8"
version = "10.8.1"
description = "Extension pack for Python Markdown."
optional = false
python-versions = ">=3.8"
files = [
{file = "pymdown_extensions-10.8-py3-none-any.whl", hash = "sha256:3539003ff0d5e219ba979d2dc961d18fcad5ac259e66c764482e8347b4c0503c"},
{file = "pymdown_extensions-10.8.tar.gz", hash = "sha256:91ca336caf414e1e5e0626feca86e145de9f85a3921a7bcbd32890b51738c428"},
{file = "pymdown_extensions-10.8.1-py3-none-any.whl", hash = "sha256:f938326115884f48c6059c67377c46cf631c733ef3629b6eed1349989d1b30cb"},
{file = "pymdown_extensions-10.8.1.tar.gz", hash = "sha256:3ab1db5c9e21728dabf75192d71471f8e50f216627e9a1fa9535ecb0231b9940"},
]
[package.dependencies]
@ -5061,6 +5316,16 @@ files = [
{file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"},
]
[[package]]
name = "pysbd"
version = "0.3.4"
description = "pysbd (Python Sentence Boundary Disambiguation) is a rule-based sentence boundary detection that works out-of-the-box across many languages."
optional = false
python-versions = ">=3"
files = [
{file = "pysbd-0.3.4-py3-none-any.whl", hash = "sha256:cd838939b7b0b185fcf86b0baf6636667dfb6e474743beeff878e9f42e022953"},
]
[[package]]
name = "pytest"
version = "7.4.4"
@ -5117,6 +5382,40 @@ black = ">=23"
pytest = ">=7"
ruff = ">=0.0.258"
[[package]]
name = "pytest-repeat"
version = "0.9.3"
description = "pytest plugin for repeating tests"
optional = false
python-versions = ">=3.7"
files = [
{file = "pytest_repeat-0.9.3-py3-none-any.whl", hash = "sha256:26ab2df18226af9d5ce441c858f273121e92ff55f5bb311d25755b8d7abdd8ed"},
{file = "pytest_repeat-0.9.3.tar.gz", hash = "sha256:ffd3836dfcd67bb270bec648b330e20be37d2966448c4148c4092d1e8aba8185"},
]
[package.dependencies]
pytest = "*"
[[package]]
name = "pytest-xdist"
version = "3.5.0"
description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs"
optional = false
python-versions = ">=3.7"
files = [
{file = "pytest-xdist-3.5.0.tar.gz", hash = "sha256:cbb36f3d67e0c478baa57fa4edc8843887e0f6cfc42d677530a36d7472b32d8a"},
{file = "pytest_xdist-3.5.0-py3-none-any.whl", hash = "sha256:d075629c7e00b611df89f490a5063944bee7a4362a5ff11c7cc7824a03dfce24"},
]
[package.dependencies]
execnet = ">=1.1"
pytest = ">=6.2.0"
[package.extras]
psutil = ["psutil (>=3.0)"]
setproctitle = ["setproctitle"]
testing = ["filelock"]
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
@ -5401,6 +5700,33 @@ urllib3 = ">=1.26.14,<3"
[package.extras]
fastembed = ["fastembed (==0.2.6)"]
[[package]]
name = "ragas"
version = "0.1.7"
description = ""
optional = false
python-versions = "*"
files = [
{file = "ragas-0.1.7-py3-none-any.whl", hash = "sha256:abe02b40a8d11842c42e222226901287858beb70203f1227a403a9261d0bb684"},
{file = "ragas-0.1.7.tar.gz", hash = "sha256:db857262dda63fc01a7eef837cbba166202084b5d535b2e8ad408c63a66f9319"},
]
[package.dependencies]
appdirs = "*"
datasets = "*"
langchain = "*"
langchain-community = "*"
langchain-core = "*"
langchain-openai = "*"
nest-asyncio = "*"
numpy = "*"
openai = ">1"
pysbd = ">=0.3.4"
tiktoken = "*"
[package.extras]
all = ["sentence-transformers"]
[[package]]
name = "ratelimiter"
version = "1.2.0.post0"
@ -5905,6 +6231,53 @@ nativelib = ["pyobjc-framework-Cocoa", "pywin32"]
objc = ["pyobjc-framework-Cocoa"]
win32 = ["pywin32"]
[[package]]
name = "sentry-sdk"
version = "2.0.1"
description = "Python client for Sentry (https://sentry.io)"
optional = false
python-versions = ">=3.6"
files = [
{file = "sentry_sdk-2.0.1-py2.py3-none-any.whl", hash = "sha256:b54c54a2160f509cf2757260d0cf3885b608c6192c2555a3857e3a4d0f84bdb3"},
{file = "sentry_sdk-2.0.1.tar.gz", hash = "sha256:c278e0f523f6f0ee69dc43ad26dcdb1202dffe5ac326ae31472e012d941bee21"},
]
[package.dependencies]
certifi = "*"
urllib3 = ">=1.26.11"
[package.extras]
aiohttp = ["aiohttp (>=3.5)"]
arq = ["arq (>=0.23)"]
asyncpg = ["asyncpg (>=0.23)"]
beam = ["apache-beam (>=2.12)"]
bottle = ["bottle (>=0.12.13)"]
celery = ["celery (>=3)"]
celery-redbeat = ["celery-redbeat (>=2)"]
chalice = ["chalice (>=1.16.0)"]
clickhouse-driver = ["clickhouse-driver (>=0.2.0)"]
django = ["django (>=1.8)"]
falcon = ["falcon (>=1.4)"]
fastapi = ["fastapi (>=0.79.0)"]
flask = ["blinker (>=1.1)", "flask (>=0.11)", "markupsafe"]
grpcio = ["grpcio (>=1.21.1)"]
httpx = ["httpx (>=0.16.0)"]
huey = ["huey (>=2)"]
loguru = ["loguru (>=0.5)"]
openai = ["openai (>=1.0.0)", "tiktoken (>=0.3.0)"]
opentelemetry = ["opentelemetry-distro (>=0.35b0)"]
opentelemetry-experimental = ["opentelemetry-distro (>=0.40b0,<1.0)", "opentelemetry-instrumentation-aiohttp-client (>=0.40b0,<1.0)", "opentelemetry-instrumentation-django (>=0.40b0,<1.0)", "opentelemetry-instrumentation-fastapi (>=0.40b0,<1.0)", "opentelemetry-instrumentation-flask (>=0.40b0,<1.0)", "opentelemetry-instrumentation-requests (>=0.40b0,<1.0)", "opentelemetry-instrumentation-sqlite3 (>=0.40b0,<1.0)", "opentelemetry-instrumentation-urllib (>=0.40b0,<1.0)"]
pure-eval = ["asttokens", "executing", "pure-eval"]
pymongo = ["pymongo (>=3.1)"]
pyspark = ["pyspark (>=2.4.4)"]
quart = ["blinker (>=1.1)", "quart (>=0.16.1)"]
rq = ["rq (>=0.6)"]
sanic = ["sanic (>=0.8)"]
sqlalchemy = ["sqlalchemy (>=1.2)"]
starlette = ["starlette (>=0.19.1)"]
starlite = ["starlite (>=1.48)"]
tornado = ["tornado (>=5)"]
[[package]]
name = "setuptools"
version = "69.5.1"
@ -6644,6 +7017,21 @@ files = [
{file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"},
]
[[package]]
name = "typing-inspect"
version = "0.9.0"
description = "Runtime inspection utilities for typing module."
optional = false
python-versions = "*"
files = [
{file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"},
{file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"},
]
[package.dependencies]
mypy-extensions = ">=0.3.0"
typing-extensions = ">=3.7.4"
[[package]]
name = "tzdata"
version = "2024.1"
@ -7227,7 +7615,7 @@ duckdb = ["duckdb"]
filesystem = []
gcp = []
gs = []
lancedb = []
lancedb = ["lancedb"]
motherduck = ["duckdb", "pyarrow"]
mssql = []
neo4j = ["neo4j"]
@ -7244,4 +7632,4 @@ weaviate = ["weaviate-client"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9.0,<3.12"
content-hash = "9609dfc41209efd1fd5e53fd1b77353cb5bce3244ffdb65f81eee686a68b6fc5"
content-hash = "0f66c0ad86b74b152f430a3ed9376bf63154eac83b12b7c3dd96f86af4399079"

View file

@ -63,6 +63,9 @@ tiktoken = "^0.6.0"
dspy-ai = "2.4.3"
posthog = "^3.5.0"
lancedb = "^0.6.10"
importlib-metadata = "6.1.0"
deepeval = "^0.21.36"
[tool.poetry.extras]