Merge branch 'main' of github.com:topoteretes/cognee into COG-334-structure-routing

This commit is contained in:
Igor Ilic 2024-11-05 22:53:33 +01:00
commit ddf495266b
30 changed files with 1661 additions and 1329 deletions

81
.github/workflows/auto-comment.yml vendored Normal file
View file

@ -0,0 +1,81 @@
name: Issue and PR Auto Comments
on:
issues:
types:
- opened
- closed
- assigned
pull_request_target:
types:
- opened
- closed
permissions:
contents: read
jobs:
auto-comment:
permissions:
issues: write
pull-requests: write
runs-on: ubuntu-latest
steps:
# configuration for auto-comment actions
- name: Configure Auto Comments
uses: wow-actions/auto-comment@v1
with:
GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
issuesOpened: |
👋 @{{ author }}
Thank you for raising an issue. We will investigate the matter and get back to you as soon as possible.
To help us address your issue efficiently, please ensure you have provided:
- A clear description of the problem
- Steps to reproduce (if applicable)
- Expected vs actual behavior
- Any relevant screenshots or error messages
Our team typically responds within 2-3 business days.
issuesClosed: |
✅ @{{ author }}
This issue has been closed. If you have any further questions or if the issue resurfaces,
please feel free to:
- Add a comment to this thread
- Open a new issue with reference to this one
Thank you for helping us improve!
pullRequestOpened: |
👍 @{{ author }}
Thank you for your pull request and contributing to our community!
Please ensure you have:
- [ ] Followed our contributing guidelines
- [ ] Added/updated tests (if applicable)
- [ ] Updated documentation (if applicable)
- [ ] Added a descriptive PR title
Our team will review your contribution as soon as possible. Feel free to reach out if you need any assistance.
# Separate action for merged PRs
- name: Handle Merged Pull Requests
if: github.event.pull_request.merged == true
uses: actions-cool/pr-welcome@v1.2.1
with:
token: ${{ secrets.GH_TOKEN }}
comment: |
🎉 Fantastic work @${{ github.event.pull_request.user.login }}! 🎉
Your pull request has been merged successfully. Thank you for your valuable contribution!
We appreciate the time and effort you've put into improving our project.
Your changes will be included in our next release.
Keep up the great work! 💪
emoji: 'rocket'
pr-emoji: '+1, heart, rocket'

View file

@ -20,10 +20,16 @@ jobs:
uses: docker/setup-buildx-action@v3
- name: Build Docker images
env:
ENVIRONMENT: dev
ENV: dev
run: |
docker compose -f docker-compose.yml build
- name: Run Docker Compose
env:
ENVIRONMENT: dev
ENV: dev
run: |
docker compose -f docker-compose.yml up -d

61
.github/workflows/test_notebook.yml vendored Normal file
View file

@ -0,0 +1,61 @@
name: test | notebook
on:
pull_request:
branches:
- main
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
RUNTIME__LOG_LEVEL: ERROR
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_notebook_test:
name: test
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
runs-on: ubuntu-latest
defaults:
run:
shell: bash
steps:
- name: Check out
uses: actions/checkout@master
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.11.x'
- name: Install Poetry
uses: snok/install-poetry@v1.3.2
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install dependencies
run: |
poetry install --no-interaction
poetry add jupyter --no-interaction
- name: Execute Jupyter Notebook
env:
ENV: 'dev'
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
run: |
poetry run jupyter nbconvert \
--to notebook \
--execute notebooks/cognee_demo.ipynb \
--output executed_notebook.ipynb \
--ExecutePreprocessor.timeout=1200

View file

@ -25,7 +25,7 @@ RUN pip install poetry
RUN poetry config virtualenvs.create false
# Install the dependencies
RUN poetry install --no-root --no-dev
RUN poetry install --all-extras --no-root --no-dev
# Set the PYTHONPATH environment variable to include the /app directory

View file

@ -29,6 +29,10 @@ If you have questions, join our <a href="https://discord.gg/NQPKmU5CCg">Discord
pip install cognee
```
### With pip with PostgreSQL support
```bash
pip install cognee[postgres]
```
### With poetry
@ -36,6 +40,11 @@ pip install cognee
poetry add cognee
```
### With poetry with PostgreSQL support
```bash
poetry add cognee -E postgres
```
## 💻 Basic Usage
@ -50,7 +59,7 @@ os.environ["LLM_API_KEY"] = "YOUR OPENAI_API_KEY"
or
```
import cognee
cognee.config.llm_api_key = "YOUR_OPENAI_API_KEY"
cognee.config.set_llm_api_key("YOUR_OPENAI_API_KEY")
```
You can also set the variables by creating .env file, here is our <a href="https://github.com/topoteretes/cognee/blob/main/.env.template">template.</a>
To use different LLM providers, for more info check out our <a href="https://topoteretes.github.io/cognee">documentation</a>
@ -73,26 +82,54 @@ docker-compose up
```
Then navigate to localhost:3000
If you want to use the UI with PostgreSQL through docker-compose make sure to set the following values in the .env file:
```
DB_PROVIDER=postgres
DB_HOST=postgres
DB_PORT=5432
DB_NAME=cognee_db
DB_USERNAME=cognee
DB_PASSWORD=cognee
```
### Simple example
Run the default cognee pipeline:
First, copy `.env.template` to `.env` and add your OpenAI API key to the LLM_API_KEY field.
```
Optionally, set `VECTOR_DB_PROVIDER="lancedb"` in `.env` to simplify setup.
This script will run the default pipeline:
```python
import cognee
import asyncio
from cognee.api.v1.search import SearchType
text = """Natural language processing (NLP) is an interdisciplinary
subfield of computer science and information retrieval"""
async def main():
await cognee.prune.prune_data() # Reset cognee data
await cognee.prune.prune_system(metadata=True) # Reset cognee system state
await cognee.add(text) # Add a new piece of information
text = """
Natural language processing (NLP) is an interdisciplinary
subfield of computer science and information retrieval.
"""
await cognee.cognify() # Use LLMs and cognee to create a knowledge graph
await cognee.add(text) # Add text to cognee
await cognee.cognify() # Use LLMs and cognee to create knowledge graph
search_results = await cognee.search("INSIGHTS", {'query': 'NLP'}) # Query cognee for the insights
search_results = await cognee.search( # Search cognee for insights
SearchType.INSIGHTS,
{'query': 'Tell me about NLP'}
)
for result in search_results:
do_something_with_result(result)
for result_text in search_results: # Display results
print(result_text)
asyncio.run(main())
```
A version of this example is here: `examples/pyton/simple_example.py`
### Create your own memory store

View file

@ -5,6 +5,7 @@ from cognee.modules.cognify.config import get_cognify_config
from cognee.infrastructure.data.chunking.config import get_chunk_config
from cognee.infrastructure.databases.vector import get_vectordb_config
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.llm.config import get_llm_config
from cognee.infrastructure.databases.relational import get_relational_config
from cognee.infrastructure.files.storage import LocalStorage
@ -55,19 +56,36 @@ class config():
graph_config.graph_database_provider = graph_database_provider
@staticmethod
def llm_provider(llm_provider: str):
graph_config = get_graph_config()
graph_config.llm_provider = llm_provider
def set_llm_provider(llm_provider: str):
llm_config = get_llm_config()
llm_config.llm_provider = llm_provider
@staticmethod
def llm_endpoint(llm_endpoint: str):
graph_config = get_graph_config()
graph_config.llm_endpoint = llm_endpoint
def set_llm_endpoint(llm_endpoint: str):
llm_config = get_llm_config()
llm_config.llm_endpoint = llm_endpoint
@staticmethod
def llm_model(llm_model: str):
graph_config = get_graph_config()
graph_config.llm_model = llm_model
def set_llm_model(llm_model: str):
llm_config = get_llm_config()
llm_config.llm_model = llm_model
@staticmethod
def set_llm_api_key(llm_api_key: str):
llm_config = get_llm_config()
llm_config.llm_api_key = llm_api_key
@staticmethod
def set_llm_config(config_dict: dict):
"""
Updates the llm config with values from config_dict.
"""
llm_config = get_llm_config()
for key, value in config_dict.items():
if hasattr(llm_config, key):
object.__setattr__(llm_config, key, value)
else:
raise AttributeError(f"'{key}' is not a valid attribute of the config.")
@staticmethod
def set_chunk_strategy(chunk_strategy: object):
@ -137,5 +155,5 @@ class config():
if "username" not in graphistry_config or "password" not in graphistry_config:
raise ValueError("graphistry_config dictionary must contain 'username' and 'password' keys.")
base_config.graphistry_username = graphistry_config.username
base_config.graphistry_password = graphistry_config.password
base_config.graphistry_username = graphistry_config.get("username")
base_config.graphistry_password = graphistry_config.get("password")

View file

@ -1,25 +0,0 @@
from typing import List, Optional
from fastembed import TextEmbedding
from cognee.root_dir import get_absolute_path
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
class FastembedEmbeddingEngine(EmbeddingEngine):
embedding_model: str
embedding_dimensions: int
def __init__(
self,
embedding_model: Optional[str] = "BAAI/bge-large-en-v1.5",
embedding_dimensions: Optional[int] = 1024,
):
self.embedding_model = embedding_model
self.embedding_dimensions = embedding_dimensions
async def embed_text(self, text: List[str]) -> List[float]:
embedding_model = TextEmbedding(model_name = self.embedding_model, cache_dir = get_absolute_path("cache/embeddings"))
embeddings_list = list(map(lambda embedding: embedding.tolist(), embedding_model.embed(text)))
return embeddings_list
def get_vector_size(self) -> int:
return self.embedding_dimensions

View file

@ -164,7 +164,10 @@ class LanceDBAdapter(VectorDBInterface):
async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
connection = await self.get_connection()
collection = await connection.open_table(collection_name)
results = await collection.delete(f"id IN {tuple(data_point_ids)}")
if len(data_point_ids) == 1:
results = await collection.delete(f"id = '{data_point_ids[0]}'")
else:
results = await collection.delete(f"id IN {tuple(data_point_ids)}")
return results
async def prune(self):

View file

@ -1,7 +1,8 @@
from typing import BinaryIO
from pypdf import PdfReader
import filetype
def extract_text_from_file(file: BinaryIO, file_type) -> str:
def extract_text_from_file(file: BinaryIO, file_type: filetype.Type) -> str:
"""Extract text from a file"""
if file_type.extension == "pdf":
reader = PdfReader(stream = file)

View file

@ -1,5 +0,0 @@
import os
def get_file_size(file_path: str):
"""Get the size of a file"""
return os.path.getsize(file_path)

View file

@ -1,4 +1,3 @@
import dsp
import dspy
from dspy.evaluate.evaluate import Evaluate
from dspy.primitives.example import Example

View file

@ -1,4 +1,3 @@
import dsp
import dspy
from dspy.teleprompt import BootstrapFewShot
from dspy.primitives.example import Example

View file

@ -5,7 +5,7 @@ from .models.Task import Task
class PipelineConfig(BaseModel):
batch_count: int = 10
description: Optional[str]
description: Optional[str] = None
class Pipeline():
id: UUID = uuid4()

View file

@ -1,8 +1,8 @@
from typing import Any, Callable, Generator
from typing import Any, Callable, Generator, List
import asyncio
from ..tasks.Task import Task
def run_tasks_parallel(tasks: [Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
def run_tasks_parallel(tasks: List[Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
async def parallel_run(*args, **kwargs):
parallel_tasks = [asyncio.create_task(task.run(*args, **kwargs)) for task in tasks]

View file

@ -18,7 +18,7 @@ class Directory(BaseModel):
directories: List['Directory'] = []
# Allows recursive Directory Model
Directory.update_forward_refs()
Directory.model_rebuild()
class RepositoryProperties(BaseModel):
custom_properties: Optional[Dict[str, Any]] = None

View file

@ -6,15 +6,15 @@ class BaseClass(BaseModel):
name: str
type: Literal["Class"] = "Class"
description: str
constructor_parameters: Optional[List[str]]
constructor_parameters: Optional[List[str]] = None
class Class(BaseModel):
id: str
name: str
type: Literal["Class"] = "Class"
description: str
constructor_parameters: Optional[List[str]]
from_class: Optional[BaseClass]
constructor_parameters: Optional[List[str]] = None
from_class: Optional[BaseClass] = None
class ClassInstance(BaseModel):
id: str
@ -28,7 +28,7 @@ class Function(BaseModel):
name: str
type: Literal["Function"] = "Function"
description: str
parameters: Optional[List[str]]
parameters: Optional[List[str]] = None
return_type: str
is_static: Optional[bool] = False
@ -38,7 +38,7 @@ class Variable(BaseModel):
type: Literal["Variable"] = "Variable"
description: str
is_static: Optional[bool] = False
default_value: Optional[str]
default_value: Optional[str] = None
class Operator(BaseModel):
id: str

View file

@ -21,7 +21,6 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
for chunk_index, chunk in enumerate(data_chunks):
chunk_classification = chunk_classifications[chunk_index]
classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
for classification_subclass in chunk_classification.label.subclass:
classification_data_points.append(uuid5(NAMESPACE_OID, classification_subclass.value))
@ -39,7 +38,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
if await vector_engine.has_collection(collection_name):
existing_data_points = await vector_engine.retrieve(
collection_name,
list(set(classification_data_points)),
[str(classification_data) for classification_data in list(set(classification_data_points))],
) if len(classification_data_points) > 0 else []
existing_points_map = {point.id: True for point in existing_data_points}
@ -60,7 +59,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
data_points.append(
DataPoint[Keyword](
id=str(classification_type_id),
payload=Keyword.parse_obj({
payload=Keyword.model_validate({
"uuid": str(classification_type_id),
"text": classification_type_label,
"chunk_id": str(data_chunk.chunk_id),
@ -99,7 +98,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
data_points.append(
DataPoint[Keyword](
id=str(classification_subtype_id),
payload=Keyword.parse_obj({
payload=Keyword.model_validate({
"uuid": str(classification_subtype_id),
"text": classification_subtype_label,
"chunk_id": str(data_chunk.chunk_id),

View file

@ -56,7 +56,7 @@ class OntologyEngine:
for item in items:
flat_list.extend(await self.recursive_flatten(item, parent_id))
elif isinstance(items, dict):
model = NodeModel.parse_obj(items)
model = NodeModel.model_validate(items)
flat_list.append(await self.flatten_model(model, parent_id))
for child in model.children:
flat_list.extend(await self.recursive_flatten(child, model.node_id))

View file

@ -12,7 +12,7 @@ class NodeModel(BaseModel):
default_relationship: Optional[RelationshipModel] = None
children: List[Union[Dict[str, Any], "NodeModel"]] = Field(default_factory=list)
NodeModel.update_forward_refs()
NodeModel.model_rebuild()
class OntologyNode(BaseModel):

View file

@ -11,7 +11,7 @@ async def save_chunks_to_store(data_chunks: list[DocumentChunk], collection_name
# Remove and unlink existing chunks
if await vector_engine.has_collection(collection_name):
existing_chunks = [DocumentChunk.parse_obj(chunk.payload) for chunk in (await vector_engine.retrieve(
existing_chunks = [DocumentChunk.model_validate(chunk.payload) for chunk in (await vector_engine.retrieve(
collection_name,
[str(chunk.chunk_id) for chunk in data_chunks],
))]

View file

@ -49,7 +49,7 @@ async def main():
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\n\Extracted summaries are:\n")
print("\nExtracted summaries are:\n")
for result in search_results:
print(f"{result}\n")

View file

@ -53,7 +53,7 @@ async def main():
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\n\Extracted summaries are:\n")
print("\nExtracted summaries are:\n")
for result in search_results:
print(f"{result}\n")

View file

@ -54,7 +54,7 @@ async def main():
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\n\Extracted summaries are:\n")
print("\nExtracted summaries are:\n")
for result in search_results:
print(f"{result}\n")

View file

@ -52,7 +52,7 @@ async def main():
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\n\Extracted summaries are:\n")
print("\nExtracted summaries are:\n")
for result in search_results:
print(f"{result}\n")

View file

@ -14,9 +14,11 @@ Check available configuration options:
from cognee.infrastructure.databases.vector import get_vectordb_config
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.relational import get_relational_config
from cognee.infrastructure.llm.config import get_llm_config
print(get_vectordb_config().to_dict())
print(get_graph_config().to_dict())
print(get_relational_config().to_dict())
print(get_llm_config().to_dict())
```
@ -29,8 +31,7 @@ GRAPH_DATABASE_PROVIDER = 'lancedb'
Otherwise, you can set the configuration yourself:
```python
cognee.config.llm_provider = 'ollama'
cognee.config.set_llm_provider('ollama')
```
## 🚀 Getting Started with Local Models
@ -52,15 +53,14 @@ LLM_PROVIDER = 'ollama'
Otherwise, you can set the configuration for the model:
```bash
cognee.config.llm_provider = 'ollama'
cognee.config.set_llm_provider('ollama')
```
You can also set the HOST and model name:
```bash
cognee.config.llm_endpoint = "http://localhost:11434/v1"
cognee.config.llm_model = "mistral:instruct"
cognee.config.set_llm_endpoint("http://localhost:11434/v1")
cognee.config.set_llm_model("mistral:instruct")
```
@ -73,7 +73,7 @@ LLM_PROVIDER = 'custom'
Otherwise, you can set the configuration for the model:
```bash
cognee.config.llm_provider = 'custom'
cognee.config.set_llm_provider('custom')
```
You can also set the HOST and model name:

View file

@ -0,0 +1,39 @@
import cognee
import asyncio
from cognee.api.v1.search import SearchType
# Prerequisites:
# 1. Copy `.env.template` and rename it to `.env`.
# 2. Add your OpenAI API key to the `.env` file in the `LLM_API_KEY` field:
# LLM_API_KEY = "your_key_here"
# 3. (Optional) To minimize setup effort, set `VECTOR_DB_PROVIDER="lancedb"` in `.env".
async def main():
# Create a clean slate for cognee -- reset data and system state
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
# cognee knowledge graph will be created based on this text
text = """
Natural language processing (NLP) is an interdisciplinary
subfield of computer science and information retrieval.
"""
# Add the text, and make it available for cognify
await cognee.add(text)
# Use LLMs and cognee to create knowledge graph
await cognee.cognify()
# Query cognee for insights on the added text
search_results = await cognee.search(
SearchType.INSIGHTS,
{'query': 'Tell me about NLP'}
)
# Display search results
for result_text in search_results:
print(result_text)
if __name__ == '__main__':
asyncio.run(main())

0
log.txt Normal file
View file

View file

@ -537,10 +537,14 @@
"import os\n",
"\n",
"# # Setting environment variables\n",
"os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
"os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
"if \"GRAPHISTRY_USERNAME\" not in os.environ: \n",
" os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
"\n",
"os.environ[\"LLM_API_KEY\"] = \"\"\n",
"if \"GRAPHISTRY_PASSWORD\" not in os.environ: \n",
" os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
"\n",
"if \"LLM_API_KEY\" not in os.environ:\n",
" os.environ[\"LLM_API_KEY\"] = \"\"\n",
"\n",
"os.environ[\"GRAPH_DATABASE_PROVIDER\"]=\"networkx\" # \"neo4j\" or \"networkx\"\n",
"# Not needed if using networkx\n",
@ -577,6 +581,7 @@
"\n",
"import cognee\n",
"\n",
"await cognee.prune.prune_data()\n",
"await cognee.prune.prune_system(metadata=True)"
]
},
@ -639,7 +644,8 @@
" chunks_into_graph, \\\n",
" source_documents_to_chunks, \\\n",
" check_permissions_on_documents, \\\n",
" classify_documents\n",
" classify_documents, \\\n",
" chunk_naive_llm_classifier\n",
"from cognee.tasks.summarization import summarize_text\n",
"\n",
"async def run_cognify_pipeline(dataset: Dataset, user: User = None):\n",
@ -667,6 +673,10 @@
" summarization_model = cognee_config.summarization_model,\n",
" collection_name = \"summaries\",\n",
" ),\n",
" Task(\n",
" chunk_naive_llm_classifier,\n",
" classification_model = cognee_config.classification_model,\n",
" ),\n",
" Task(chunk_remove_disconnected), # Remove the obsolete document chunks.\n",
" ]\n",
"\n",
@ -876,7 +886,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "cognee-bGi0WgSG-py3.9",
"display_name": ".venv",
"language": "python",
"name": "python3"
},
@ -890,7 +900,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
"version": "3.9.6"
}
},
"nbformat": 4,

2548
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -19,53 +19,51 @@ classifiers = [
[tool.poetry.dependencies]
python = ">=3.9.0,<3.12"
openai = "1.27.0"
openai = "1.52.0"
pydantic = "2.8.2"
python-dotenv = "1.0.1"
fastapi = "^0.109.2"
uvicorn = "0.22.0"
requests = "2.32.3"
aiohttp = "3.10.10"
typing_extensions = "4.12.2"
dspy = "2.5.25"
nest_asyncio = "1.6.0"
numpy = "1.26.4"
datasets = "3.1.0"
falkordb = "1.0.9"
boto3 = "^1.26.125"
botocore="^1.35.54"
gunicorn = "^20.1.0"
sqlalchemy = "2.0.35"
instructor = "1.3.5"
instructor = "1.6.3"
networkx = "^3.2.1"
debugpy = "1.8.2"
pyarrow = "15.0.0"
pylint = "^3.0.3"
aiosqlite = "^0.20.0"
pandas = "2.0.3"
greenlet = "^3.0.3"
ruff = "^0.2.2"
filetype = "^1.2.0"
nltk = "^3.8.1"
dlt = {extras = ["sqlalchemy"], version = "^1.2.0"}
overrides = "^7.7.0"
aiofiles = "^23.2.1"
qdrant-client = "^1.9.0"
graphistry = "^0.33.5"
tenacity = "^8.2.3"
tenacity = "^9.0.0"
weaviate-client = "4.6.7"
scikit-learn = "^1.5.0"
fastembed = "0.2.7"
pypdf = "^4.1.0"
neo4j = "^5.20.0"
jinja2 = "^3.1.3"
matplotlib = "^3.8.3"
structlog = "^24.1.0"
tiktoken = "0.7.0"
langchain_text_splitters = "0.3.2"
langsmith = "0.1.139"
langdetect = "1.0.9"
posthog = "^3.5.0"
lancedb = "0.8.0"
litellm = "1.38.10"
litellm = "1.49.1"
groq = "0.8.0"
tantivy = "^0.22.0"
tokenizers ="0.15.2"
transformers ="4.39.0"
python-multipart = "^0.0.9"
langfuse = "^2.32.0"
protobuf = "<5.0.0"
pydantic-settings = "^2.2.1"
anthropic = "^0.26.1"
pdfplumber = "^0.11.1"
sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
fastapi-users = { version = "*", extras = ["sqlalchemy"] }
asyncpg = "^0.29.0"
@ -88,6 +86,11 @@ pytest-asyncio = "^0.21.1"
coverage = "^7.3.2"
mypy = "^1.7.1"
notebook = "^7.1.1"
deptry = "^0.20.0"
debugpy = "1.8.2"
pylint = "^3.0.3"
ruff = "^0.2.2"
tweepy = "4.14.0"
[tool.poetry.group.docs.dependencies]
mkdocs-material = "^9.5.42"