chore: remove unused dependencies and make some optional (#661)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin
This commit is contained in:
Boris 2025-03-25 10:19:52 +01:00 committed by GitHub
parent 08b326550a
commit d192d1fe20
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 1389 additions and 1786 deletions

View file

@ -44,7 +44,7 @@ jobs:
installer-parallel: true installer-parallel: true
- name: Install dependencies - name: Install dependencies
run: poetry install --no-interaction run: poetry install --extras chromadb --no-interaction
- name: Run chromadb test - name: Run chromadb test
env: env:

View file

@ -41,7 +41,7 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
poetry install --no-interaction poetry install --extras api --no-interaction
- name: Run cognee server - name: Run cognee server
env: env:

View file

@ -2,6 +2,8 @@ FROM python:3.11-slim
# Define Poetry extras to install # Define Poetry extras to install
ARG POETRY_EXTRAS="\ ARG POETRY_EXTRAS="\
# API \
api \
# Storage & Databases \ # Storage & Databases \
filesystem postgres weaviate qdrant neo4j falkordb milvus kuzu \ filesystem postgres weaviate qdrant neo4j falkordb milvus kuzu \
# Notebooks & Interactive Environments \ # Notebooks & Interactive Environments \

View file

@ -169,9 +169,9 @@ app.include_router(get_settings_router(), prefix="/api/v1/settings", tags=["sett
app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["visualize"]) app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["visualize"])
app.include_router( codegraph_routes = get_code_pipeline_router()
get_code_pipeline_router(), prefix="/api/v1/code-pipeline", tags=["code-pipeline"] if codegraph_routes:
) app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"])
def start_api_server(host: str = "0.0.0.0", port: int = 8000): def start_api_server(host: str = "0.0.0.0", port: int = 8000):

View file

@ -2,10 +2,10 @@ from fastapi import Form, UploadFile, Depends
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi import APIRouter from fastapi import APIRouter
from typing import List from typing import List
import aiohttp
import subprocess import subprocess
import logging import logging
import os
import requests
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user from cognee.modules.users.methods import get_authenticated_user
@ -36,17 +36,12 @@ def get_add_router() -> APIRouter:
) )
else: else:
# Fetch and store the data from other types of URL using curl # Fetch and store the data from other types of URL using curl
async with aiohttp.ClientSession() as session: response = requests.get(data)
async with session.get(data) as resp: response.raise_for_status()
if resp.status == 200:
file_data = await resp.read() file_data = await response.content()
filename = os.path.basename(data)
with open(f".data/{filename}", "wb") as f: return await cognee_add(file_data)
f.write(file_data)
await cognee_add(
"data://.data/",
f"{data.split('/')[-1]}",
)
else: else:
await cognee_add( await cognee_add(
data, data,

View file

@ -25,6 +25,7 @@ from cognee.tasks.summarization import summarize_text
from cognee.infrastructure.llm import get_max_chunk_tokens from cognee.infrastructure.llm import get_max_chunk_tokens
monitoring = get_base_config().monitoring_tool monitoring = get_base_config().monitoring_tool
if monitoring == MonitoringTool.LANGFUSE: if monitoring == MonitoringTool.LANGFUSE:
from langfuse.decorators import observe from langfuse.decorators import observe

View file

@ -3,7 +3,6 @@ import logging
from fastapi import APIRouter from fastapi import APIRouter
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from cognee.api.DTO import InDTO from cognee.api.DTO import InDTO
from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
from cognee.modules.retrieval.code_retriever import CodeRetriever from cognee.modules.retrieval.code_retriever import CodeRetriever
from cognee.modules.storage.utils import JSONEncoder from cognee.modules.storage.utils import JSONEncoder
@ -22,11 +21,19 @@ class CodePipelineRetrievePayloadDTO(InDTO):
def get_code_pipeline_router() -> APIRouter: def get_code_pipeline_router() -> APIRouter:
try:
import run_code_graph_pipeline
except ModuleNotFoundError:
logger.error("codegraph dependencies not found. Skipping codegraph API routes.")
return None
router = APIRouter() router = APIRouter()
@router.post("/index", response_model=None) @router.post("/index", response_model=None)
async def code_pipeline_index(payload: CodePipelineIndexPayloadDTO): async def code_pipeline_index(payload: CodePipelineIndexPayloadDTO):
"""This endpoint is responsible for running the indexation on code repo.""" """This endpoint is responsible for running the indexation on code repo."""
from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
try: try:
async for result in run_code_graph_pipeline(payload.repo_path, payload.include_docs): async for result in run_code_graph_pipeline(payload.repo_path, payload.include_docs):
logger.info(result) logger.info(result)

View file

@ -1,11 +1,7 @@
from fastapi import Form, UploadFile, Depends import logging
from fastapi import Depends
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi import APIRouter from fastapi import APIRouter
from typing import List
import aiohttp
import subprocess
import logging
import os
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user from cognee.modules.users.methods import get_authenticated_user

View file

@ -1,8 +1,4 @@
import requests from typing import Any
import os
import json
import random
from typing import Optional, Any, List, Tuple
from cognee.eval_framework.benchmark_adapters.hotpot_qa_adapter import HotpotQAAdapter from cognee.eval_framework.benchmark_adapters.hotpot_qa_adapter import HotpotQAAdapter

View file

@ -1,9 +1,11 @@
import asyncio import asyncio
import httpx import aiohttp
import logging import logging
from typing import List, Optional from typing import List, Optional
import os import os
import aiohttp.http_exceptions
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
from cognee.infrastructure.databases.exceptions.EmbeddingException import EmbeddingException from cognee.infrastructure.databases.exceptions.EmbeddingException import EmbeddingException
from cognee.infrastructure.llm.tokenizer.HuggingFace import HuggingFaceTokenizer from cognee.infrastructure.llm.tokenizer.HuggingFace import HuggingFaceTokenizer
@ -48,14 +50,10 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
if self.mock: if self.mock:
return [[0.0] * self.dimensions for _ in text] return [[0.0] * self.dimensions for _ in text]
embeddings = [] embeddings = await asyncio.gather(*[self._get_embedding(prompt) for prompt in text])
async with httpx.AsyncClient() as client:
for prompt in text:
embedding = await self._get_embedding(client, prompt)
embeddings.append(embedding)
return embeddings return embeddings
async def _get_embedding(self, client: httpx.AsyncClient, prompt: str) -> List[float]: async def _get_embedding(self, prompt: str) -> List[float]:
""" """
Internal method to call the Ollama embeddings endpoint for a single prompt. Internal method to call the Ollama embeddings endpoint for a single prompt.
""" """
@ -71,13 +69,13 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
retries = 0 retries = 0
while retries < self.MAX_RETRIES: while retries < self.MAX_RETRIES:
try: try:
response = await client.post( async with aiohttp.ClientSession() as session:
self.endpoint, json=payload, headers=headers, timeout=60.0 async with session.post(
) self.endpoint, json=payload, headers=headers, timeout=60.0
response.raise_for_status() ) as response:
data = response.json() data = await response.json()
return data["embedding"] return data["embedding"]
except httpx.HTTPStatusError as e: except aiohttp.http_exceptions.HttpBadRequest as e:
logger.error(f"HTTP error on attempt {retries + 1}: {e}") logger.error(f"HTTP error on attempt {retries + 1}: {e}")
retries += 1 retries += 1
await asyncio.sleep(min(2**retries, 60)) await asyncio.sleep(min(2**retries, 60))

View file

@ -1,7 +1,6 @@
from typing import Type from typing import Type
from pydantic import BaseModel from pydantic import BaseModel
import instructor import instructor
import anthropic
from cognee.exceptions import InvalidValueError from cognee.exceptions import InvalidValueError
from cognee.infrastructure.llm.llm_interface import LLMInterface from cognee.infrastructure.llm.llm_interface import LLMInterface
@ -15,9 +14,12 @@ class AnthropicAdapter(LLMInterface):
model: str model: str
def __init__(self, max_tokens: int, model: str = None): def __init__(self, max_tokens: int, model: str = None):
import anthropic
self.aclient = instructor.patch( self.aclient = instructor.patch(
create=anthropic.Anthropic().messages.create, mode=instructor.Mode.ANTHROPIC_TOOLS create=anthropic.Anthropic().messages.create, mode=instructor.Mode.ANTHROPIC_TOOLS
) )
self.model = model self.model = model
self.max_tokens = max_tokens self.max_tokens = max_tokens

View file

@ -2,7 +2,6 @@ from typing import Type, Optional
from pydantic import BaseModel from pydantic import BaseModel
import logging import logging
import litellm import litellm
import asyncio
from litellm import acompletion, JSONSchemaValidationError from litellm import acompletion, JSONSchemaValidationError
from cognee.shared.data_models import MonitoringTool from cognee.shared.data_models import MonitoringTool
from cognee.exceptions import InvalidValueError from cognee.exceptions import InvalidValueError
@ -13,6 +12,7 @@ from cognee.base_config import get_base_config
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
monitoring = get_base_config().monitoring_tool monitoring = get_base_config().monitoring_tool
if monitoring == MonitoringTool.LANGFUSE: if monitoring == MonitoringTool.LANGFUSE:
from langfuse.decorators import observe from langfuse.decorators import observe

View file

@ -13,6 +13,7 @@ from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.base_config import get_base_config from cognee.base_config import get_base_config
monitoring = get_base_config().monitoring_tool monitoring = get_base_config().monitoring_tool
if monitoring == MonitoringTool.LANGFUSE: if monitoring == MonitoringTool.LANGFUSE:
from langfuse.decorators import observe from langfuse.decorators import observe

View file

@ -4,7 +4,6 @@ from typing import Type
from instructor.exceptions import InstructorRetryException from instructor.exceptions import InstructorRetryException
from pydantic import BaseModel from pydantic import BaseModel
from tenacity import RetryError
from cognee.infrastructure.llm.get_llm_client import get_llm_client from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.infrastructure.llm.prompts import read_query_prompt from cognee.infrastructure.llm.prompts import read_query_prompt
@ -36,7 +35,7 @@ async def extract_code_summary(content: str):
else: else:
try: try:
result = await extract_summary(content, response_model=SummarizedCode) result = await extract_summary(content, response_model=SummarizedCode)
except (RetryError, InstructorRetryException) as e: except InstructorRetryException as e:
logger.error("Failed to extract code summary, falling back to mock summary", exc_info=e) logger.error("Failed to extract code summary, falling back to mock summary", exc_info=e)
result = get_mock_summarized_code() result = get_mock_summarized_code()

View file

@ -1,63 +0,0 @@
import re
from nltk.downloader import download
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords, wordnet
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
def extract_topics(text: str):
sentences = sent_tokenize(text)
try:
wordnet.ensure_loaded()
except LookupError:
download("wordnet")
wordnet.ensure_loaded()
lemmatizer = WordNetLemmatizer()
base_notation_sentences = [lemmatizer.lemmatize(sentence) for sentence in sentences]
tf_vectorizer = TfidfVectorizer(tokenizer=word_tokenize, token_pattern=None)
transformed_corpus = tf_vectorizer.fit_transform(base_notation_sentences)
svd = TruncatedSVD(n_components=10)
svd_corpus = svd.fit(transformed_corpus)
feature_scores = dict(zip(tf_vectorizer.vocabulary_, svd_corpus.components_[0]))
topics = sorted(
feature_scores,
# key = feature_scores.get,
key=lambda x: transformed_corpus[0, tf_vectorizer.vocabulary_[x]],
reverse=True,
)[:10]
return topics
def clean_text(text: str):
text = re.sub(r"[ \t]{2,}|[\n\r]", " ", text.lower())
return re.sub(r"[`\"'.,;!?…]", "", text).strip()
def remove_stop_words(text: str):
try:
stopwords.ensure_loaded()
except LookupError:
download("stopwords")
stopwords.ensure_loaded()
stop_words = set(stopwords.words("english"))
text = text.split()
text = [word for word in text if word not in stop_words]
return " ".join(text)
if __name__ == "__main__":
text = """Lorem Ipsum is simply dummy text of the printing and typesetting industry... Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book… It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.
Why do we use it?
It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout! The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like).
"""
print(extract_topics(remove_stop_words(clean_text(text))))

View file

@ -21,7 +21,6 @@ from cognee.infrastructure.databases.graph import get_graph_engine
from uuid import uuid4 from uuid import uuid4
import pathlib import pathlib
import nltk
from cognee.shared.exceptions import IngestionError from cognee.shared.exceptions import IngestionError
# Analytics Proxy Url, currently hosted by Vercel # Analytics Proxy Url, currently hosted by Vercel
@ -29,7 +28,10 @@ proxy_url = "https://test.prometh.ai"
def get_entities(tagged_tokens): def get_entities(tagged_tokens):
import nltk
nltk.download("maxent_ne_chunker", quiet=True) nltk.download("maxent_ne_chunker", quiet=True)
from nltk.chunk import ne_chunk from nltk.chunk import ne_chunk
return ne_chunk(tagged_tokens) return ne_chunk(tagged_tokens)
@ -37,6 +39,7 @@ def get_entities(tagged_tokens):
def extract_pos_tags(sentence): def extract_pos_tags(sentence):
"""Extract Part-of-Speech (POS) tags for words in a sentence.""" """Extract Part-of-Speech (POS) tags for words in a sentence."""
import nltk
# Ensure that the necessary NLTK resources are downloaded # Ensure that the necessary NLTK resources are downloaded
nltk.download("words", quiet=True) nltk.download("words", quiet=True)
@ -308,37 +311,6 @@ def embed_logo(p, layout_scale, logo_alpha, position):
) )
def style_and_render_graph(p, G, layout_positions, node_attribute, node_colors, centrality):
"""
Apply styling and render the graph into the plot.
"""
from bokeh.plotting import figure, from_networkx
from bokeh.models import Circle, MultiLine, HoverTool, ColumnDataSource, Range1d
from bokeh.plotting import output_file, show
from bokeh.embed import file_html
from bokeh.resources import CDN
graph_renderer = from_networkx(G, layout_positions)
node_radii = [0.02 + 0.1 * centrality[node] for node in G.nodes()]
graph_renderer.node_renderer.data_source.data["radius"] = node_radii
graph_renderer.node_renderer.data_source.data["fill_color"] = node_colors
graph_renderer.node_renderer.glyph = Circle(
radius="radius",
fill_color="fill_color",
fill_alpha=0.9,
line_color="#000000",
line_width=1.5,
)
graph_renderer.edge_renderer.glyph = MultiLine(
line_color="#000000",
line_alpha=0.3,
line_width=1.5,
)
p.renderers.append(graph_renderer)
return graph_renderer
def graph_to_tuple(graph): def graph_to_tuple(graph):
""" """
Converts a networkx graph to a tuple of (nodes, edges). Converts a networkx graph to a tuple of (nodes, edges).

View file

@ -1,14 +1,12 @@
import os import os
import logging
import aiofiles
import importlib import importlib
from typing import AsyncGenerator, Optional from typing import AsyncGenerator, Optional
from uuid import NAMESPACE_OID, uuid5 from uuid import NAMESPACE_OID, uuid5
import tree_sitter_python as tspython import tree_sitter_python as tspython
from tree_sitter import Language, Node, Parser, Tree from tree_sitter import Language, Node, Parser, Tree
import aiofiles
import logging
from cognee.low_level import DataPoint from cognee.low_level import DataPoint
from cognee.shared.CodeGraphEntities import ( from cognee.shared.CodeGraphEntities import (
CodeFile, CodeFile,
@ -19,15 +17,15 @@ from cognee.shared.CodeGraphEntities import (
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
PY_LANGUAGE = Language(tspython.language())
source_code_parser = Parser(PY_LANGUAGE)
class FileParser: class FileParser:
def __init__(self): def __init__(self):
self.parsed_files = {} self.parsed_files = {}
async def parse_file(self, file_path: str) -> tuple[str, Tree]: async def parse_file(self, file_path: str) -> tuple[str, Tree]:
PY_LANGUAGE = Language(tspython.language())
source_code_parser = Parser(PY_LANGUAGE)
if file_path not in self.parsed_files: if file_path not in self.parsed_files:
source_code = await get_source_code(file_path) source_code = await get_source_code(file_path)
source_code_tree = source_code_parser.parse(bytes(source_code, "utf-8")) source_code_tree = source_code_parser.parse(bytes(source_code, "utf-8"))

View file

@ -8,7 +8,6 @@ from uuid import NAMESPACE_OID, uuid5
from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine import DataPoint
from cognee.shared.CodeGraphEntities import CodeFile, Repository from cognee.shared.CodeGraphEntities import CodeFile, Repository
from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies
async def get_source_code_files(repo_path): async def get_source_code_files(repo_path):
@ -75,6 +74,9 @@ async def get_repo_file_dependencies(
for chunk_number in range(number_of_chunks) for chunk_number in range(number_of_chunks)
] ]
# Codegraph dependencies are not installed by default, so we import where we use them.
from cognee.tasks.repo_processor.get_local_dependencies import get_local_script_dependencies
for start_range, end_range in chunk_ranges: for start_range, end_range in chunk_ranges:
# with ProcessPoolExecutor(max_workers=12) as executor: # with ProcessPoolExecutor(max_workers=12) as executor:
tasks = [ tasks = [

2901
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -20,53 +20,51 @@ classifiers = [
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = ">=3.10,<=3.13" python = ">=3.10,<=3.13"
openai = "^1.59.4" openai = "^1.59.4"
pydantic = "2.10.5"
python-dotenv = "1.0.1" python-dotenv = "1.0.1"
fastapi = "0.115.7" pydantic = "2.10.5"
uvicorn = "0.34.0" pydantic-settings = "^2.2.1"
requests = "2.32.3"
aiohttp = "3.10.10"
typing_extensions = "4.12.2" typing_extensions = "4.12.2"
nest_asyncio = "1.6.0" nltk = "3.9.1"
numpy = "1.26.4" numpy = "1.26.4"
datasets = "3.1.0" pandas = "2.2.3"
falkordb = {version = "1.0.9", optional = true}
kuzu = {version = "0.8.2", optional = true}
boto3 = "^1.26.125" boto3 = "^1.26.125"
botocore="^1.35.54" botocore="^1.35.54"
gunicorn = "^20.1.0"
sqlalchemy = "2.0.36" sqlalchemy = "2.0.36"
instructor = "1.7.2"
networkx = "^3.2.1"
aiosqlite = "^0.20.0" aiosqlite = "^0.20.0"
pandas = "2.2.3" tiktoken = "<=0.9.0"
litellm = ">=1.57.4"
instructor = "1.7.2"
langfuse = "^2.32.0"
filetype = "^1.2.0" filetype = "^1.2.0"
dlt = {extras = ["sqlalchemy"], version = "^1.4.1"} aiohttp = "^3.11.14"
aiofiles = "^23.2.1" aiofiles = "^23.2.1"
qdrant-client = {version = "^1.9.0", optional = true} owlready2 = "^0.47"
graphistry = "^0.33.5" graphistry = "^0.33.5"
tenacity = "^9.0.0"
weaviate-client = {version = "4.9.6", optional = true}
scikit-learn = "^1.5.0"
pypdf = ">=4.1.0,<6.0.0" pypdf = ">=4.1.0,<6.0.0"
neo4j = {version = "^5.20.0", optional = true}
jinja2 = "^3.1.3" jinja2 = "^3.1.3"
matplotlib = "^3.8.3" matplotlib = "^3.8.3"
tiktoken = "<=0.9.0" networkx = "^3.2.1"
lancedb = "0.16.0"
alembic = "^1.13.3"
pre-commit = "^4.0.1"
scikit-learn = "^1.6.1"
fastapi = {version = "0.115.7", optional = true}
fastapi-users = {version = "14.0.0", extras = ["sqlalchemy"]}
uvicorn = {version = "0.34.0", optional = true}
gunicorn = {version = "^20.1.0", optional = true}
dlt = {extras = ["sqlalchemy"], version = "^1.4.1"}
qdrant-client = {version = "^1.9.0", optional = true}
weaviate-client = {version = "4.9.6", optional = true}
neo4j = {version = "^5.20.0", optional = true}
falkordb = {version = "1.0.9", optional = true}
kuzu = {version = "0.8.2", optional = true}
chromadb = {version = "^0.6.0", optional = true}
langchain_text_splitters = {version = "0.3.2", optional = true} langchain_text_splitters = {version = "0.3.2", optional = true}
langsmith = {version = "0.2.3", optional = true} langsmith = {version = "0.2.3", optional = true}
langdetect = "1.0.9"
posthog = {version = "^3.5.0", optional = true} posthog = {version = "^3.5.0", optional = true}
lancedb = "0.16.0"
chromadb = "^0.6.0"
litellm = ">=1.57.4"
groq = {version = "0.8.0", optional = true} groq = {version = "0.8.0", optional = true}
langfuse = "^2.32.0" anthropic = {version = "^0.26.1", optional = true}
pydantic-settings = "^2.2.1"
anthropic = "^0.26.1"
sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"} sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
fastapi-users = {version = "14.0.0", extras = ["sqlalchemy"]}
alembic = "^1.13.3"
asyncpg = {version = "0.30.0", optional = true} asyncpg = {version = "0.30.0", optional = true}
pgvector = {version = "^0.3.5", optional = true} pgvector = {version = "^0.3.5", optional = true}
psycopg2 = {version = "^2.9.10", optional = true} psycopg2 = {version = "^2.9.10", optional = true}
@ -75,24 +73,18 @@ deepeval = {version = "^2.0.1", optional = true}
transformers = {version = "^4.46.3", optional = true} transformers = {version = "^4.46.3", optional = true}
pymilvus = {version = "^2.5.0", optional = true} pymilvus = {version = "^2.5.0", optional = true}
unstructured = { extras = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"], version = "^0.16.13", optional = true } unstructured = { extras = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"], version = "^0.16.13", optional = true }
pre-commit = "^4.0.1"
httpx = "0.27.0"
bokeh="^3.6.2"
nltk = "3.9.1"
google-generativeai = {version = "^0.8.4", optional = true}
mistral-common = {version = "^1.5.2", optional = true} mistral-common = {version = "^1.5.2", optional = true}
fastembed = {version = "^0.6.0", optional = true, markers = "python_version < '3.13'"} fastembed = {version = "^0.6.0", optional = true, markers = "python_version < '3.13'"}
tree-sitter = {version = "^0.24.0", optional = true} tree-sitter = {version = "^0.24.0", optional = true}
tree-sitter-python = {version = "^0.23.6", optional = true} tree-sitter-python = {version = "^0.23.6", optional = true}
plotly = {version = "^6.0.0", optional = true} plotly = {version = "^6.0.0", optional = true}
gdown = {version = "^5.2.0", optional = true} gdown = {version = "^5.2.0", optional = true}
pyside6 = {version = "^6.8.2.1", optional = true}
qasync = {version = "^0.27.1", optional = true} qasync = {version = "^0.27.1", optional = true}
graphiti-core = {version = "^0.7.0", optional = true} graphiti-core = {version = "^0.7.0", optional = true}
owlready2 = "^0.47"
[tool.poetry.extras] [tool.poetry.extras]
api = ["fastapi", "fastapi-users", "uvicorn", "gunicorn"]
filesystem = ["s3fs", "botocore"] filesystem = ["s3fs", "botocore"]
weaviate = ["weaviate-client"] weaviate = ["weaviate-client"]
qdrant = ["qdrant-client"] qdrant = ["qdrant-client"]
@ -105,12 +97,14 @@ gemini = ["google-generativeai"]
huggingface = ["transformers"] huggingface = ["transformers"]
ollama = ["transformers"] ollama = ["transformers"]
mistral = ["mistral-common"] mistral = ["mistral-common"]
anthropic = ["anthropic"]
deepeval = ["deepeval"] deepeval = ["deepeval"]
posthog = ["posthog"] posthog = ["posthog"]
falkordb = ["falkordb"] falkordb = ["falkordb"]
kuzu = ["kuzu"] kuzu = ["kuzu"]
groq = ["groq"] groq = ["groq"]
milvus = ["pymilvus"] milvus = ["pymilvus"]
chromadb = ["chromadb"]
docs = ["unstructured"] docs = ["unstructured"]
codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"] codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
evals = ["plotly", "gdown"] evals = ["plotly", "gdown"]