Merge pull request #176 from topoteretes/fix/integration-test-warnings

COG-485 - Fix/integration test warnings
2024-11-05 13:56:58 +01:00 · 2024-11-05 13:56:58 +01:00 · d3d49b64be
commit d3d49b64be
parent cc77d844b6 67f5d07919
21 changed files with 1370 additions and 1292 deletions
--- a/cognee/api/client.py
+++ b/cognee/api/client.py
@ -146,7 +146,7 @@ class DatasetDTO(OutDTO):
    id: UUID
    name: str
    created_at: datetime
-    updated_at: Optional[datetime]
+    updated_at: Optional[datetime] = None
    owner_id: UUID

@app.get("/api/v1/datasets", response_model = list[DatasetDTO])
@ -200,7 +200,7 @@ class DataDTO(OutDTO):
    id: UUID
    name: str
    created_at: datetime
-    updated_at: Optional[datetime]
+    updated_at: Optional[datetime] = None
    extension: str
    mime_type: str
    raw_data_location: str
--- a/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
+++ b/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py
@ -1,25 +0,0 @@
-from typing import List, Optional
-from fastembed import TextEmbedding
-from cognee.root_dir import get_absolute_path
-from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
-
-class FastembedEmbeddingEngine(EmbeddingEngine):
-    embedding_model: str
-    embedding_dimensions: int
-
-    def __init__(
-        self,
-        embedding_model: Optional[str] = "BAAI/bge-large-en-v1.5",
-        embedding_dimensions: Optional[int] = 1024,
-    ):
-        self.embedding_model = embedding_model
-        self.embedding_dimensions = embedding_dimensions
-
-    async def embed_text(self, text: List[str]) -> List[float]:
-        embedding_model = TextEmbedding(model_name = self.embedding_model, cache_dir = get_absolute_path("cache/embeddings"))
-        embeddings_list = list(map(lambda embedding: embedding.tolist(), embedding_model.embed(text)))
-
-        return embeddings_list
-
-    def get_vector_size(self) -> int:
-        return self.embedding_dimensions
--- a/cognee/infrastructure/files/utils/extract_text_from_file.py
+++ b/cognee/infrastructure/files/utils/extract_text_from_file.py
@ -1,7 +1,8 @@
 from typing import BinaryIO
 from pypdf import PdfReader
+import filetype

-def extract_text_from_file(file: BinaryIO, file_type) -> str:
+def extract_text_from_file(file: BinaryIO, file_type: filetype.Type) -> str:
    """Extract text from a file"""
    if file_type.extension == "pdf":
        reader = PdfReader(stream = file)
--- a/cognee/infrastructure/files/utils/get_file_size.py
+++ b/cognee/infrastructure/files/utils/get_file_size.py
@ -1,5 +0,0 @@
-import os
-
-def get_file_size(file_path: str):
-    """Get the size of a file"""
-    return os.path.getsize(file_path)
--- a/cognee/modules/cognify/evaluate.py
+++ b/cognee/modules/cognify/evaluate.py
@ -1,4 +1,3 @@
-import dsp
 import dspy
 from dspy.evaluate.evaluate import Evaluate
 from dspy.primitives.example import Example
--- a/cognee/modules/cognify/train.py
+++ b/cognee/modules/cognify/train.py
@ -1,4 +1,3 @@
-import dsp
 import dspy
 from dspy.teleprompt import BootstrapFewShot
 from dspy.primitives.example import Example
--- a/cognee/modules/pipelines/Pipeline.py
+++ b/cognee/modules/pipelines/Pipeline.py
@ -5,7 +5,7 @@ from .models.Task import Task

 class PipelineConfig(BaseModel):
    batch_count: int = 10
-    description: Optional[str]
+    description: Optional[str] = None

 class Pipeline():
    id: UUID = uuid4()
--- a/cognee/modules/pipelines/operations/run_parallel.py
+++ b/cognee/modules/pipelines/operations/run_parallel.py
@ -1,8 +1,8 @@
-from typing import Any, Callable, Generator
+from typing import Any, Callable, Generator, List
 import asyncio
 from ..tasks.Task import Task

-def run_tasks_parallel(tasks: [Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
+def run_tasks_parallel(tasks: List[Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
    async def parallel_run(*args, **kwargs):
        parallel_tasks = [asyncio.create_task(task.run(*args, **kwargs)) for task in tasks]

--- a/cognee/shared/GithubTopology.py
+++ b/cognee/shared/GithubTopology.py
@ -18,7 +18,7 @@ class Directory(BaseModel):
    directories: List['Directory'] = []

 # Allows recursive Directory Model
-Directory.update_forward_refs()
+Directory.model_rebuild()

 class RepositoryProperties(BaseModel):
    custom_properties: Optional[Dict[str, Any]] = None
--- a/cognee/shared/SourceCodeGraph.py
+++ b/cognee/shared/SourceCodeGraph.py
@ -6,15 +6,15 @@ class BaseClass(BaseModel):
    name: str
    type: Literal["Class"] = "Class"
    description: str
-    constructor_parameters: Optional[List[str]]
+    constructor_parameters: Optional[List[str]] = None

 class Class(BaseModel):
    id: str
    name: str
    type: Literal["Class"] = "Class"
    description: str
-    constructor_parameters: Optional[List[str]]
-    from_class: Optional[BaseClass]
+    constructor_parameters: Optional[List[str]] = None
+    from_class: Optional[BaseClass] = None

 class ClassInstance(BaseModel):
    id: str
@ -28,7 +28,7 @@ class Function(BaseModel):
    name: str
    type: Literal["Function"] = "Function"
    description: str
-    parameters: Optional[List[str]]
+    parameters: Optional[List[str]] = None
    return_type: str
    is_static: Optional[bool] = False

@ -38,7 +38,7 @@ class Variable(BaseModel):
    type: Literal["Variable"] = "Variable"
    description: str
    is_static: Optional[bool] = False
-    default_value: Optional[str]
+    default_value: Optional[str] = None

 class Operator(BaseModel):
    id: str
--- a/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py
+++ b/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py
@ -59,7 +59,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
            data_points.append(
                DataPoint[Keyword](
                    id=str(classification_type_id),
-                    payload=Keyword.parse_obj({
+                    payload=Keyword.model_validate({
                        "uuid": str(classification_type_id),
                        "text": classification_type_label,
                        "chunk_id": str(data_chunk.chunk_id),
@ -98,7 +98,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
                data_points.append(
                    DataPoint[Keyword](
                        id=str(classification_subtype_id),
-                        payload=Keyword.parse_obj({
+                        payload=Keyword.model_validate({
                            "uuid": str(classification_subtype_id),
                            "text": classification_subtype_label,
                            "chunk_id": str(data_chunk.chunk_id),
--- a/cognee/tasks/infer_data_ontology/infer_data_ontology.py
+++ b/cognee/tasks/infer_data_ontology/infer_data_ontology.py
@ -56,7 +56,7 @@ class OntologyEngine:
            for item in items:
                flat_list.extend(await self.recursive_flatten(item, parent_id))
        elif isinstance(items, dict):
-            model = NodeModel.parse_obj(items)
+            model = NodeModel.model_validate(items)
            flat_list.append(await self.flatten_model(model, parent_id))
            for child in model.children:
                flat_list.extend(await self.recursive_flatten(child, model.node_id))
--- a/cognee/tasks/infer_data_ontology/models/models.py
+++ b/cognee/tasks/infer_data_ontology/models/models.py
@ -12,7 +12,7 @@ class NodeModel(BaseModel):
    default_relationship: Optional[RelationshipModel] = None
    children: List[Union[Dict[str, Any], "NodeModel"]] = Field(default_factory=list)

-NodeModel.update_forward_refs()
+NodeModel.model_rebuild()


 class OntologyNode(BaseModel):
--- a/cognee/tasks/save_chunks_to_store/save_chunks_to_store.py
+++ b/cognee/tasks/save_chunks_to_store/save_chunks_to_store.py
@ -11,7 +11,7 @@ async def save_chunks_to_store(data_chunks: list[DocumentChunk], collection_name

    # Remove and unlink existing chunks
    if await vector_engine.has_collection(collection_name):
-        existing_chunks = [DocumentChunk.parse_obj(chunk.payload) for chunk in (await vector_engine.retrieve(
+        existing_chunks = [DocumentChunk.model_validate(chunk.payload) for chunk in (await vector_engine.retrieve(
            collection_name,
            [str(chunk.chunk_id) for chunk in data_chunks],
        ))]
--- a/cognee/tests/test_library.py
+++ b/cognee/tests/test_library.py
@ -49,7 +49,7 @@ async def  main():

    search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\n\Extracted summaries are:\n")
+    print("\nExtracted summaries are:\n")
    for result in search_results:
        print(f"{result}\n")

--- a/cognee/tests/test_neo4j.py
+++ b/cognee/tests/test_neo4j.py
@ -53,7 +53,7 @@ async def main():

    search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\n\Extracted summaries are:\n")
+    print("\nExtracted summaries are:\n")
    for result in search_results:
        print(f"{result}\n")

--- a/cognee/tests/test_qdrant.py
+++ b/cognee/tests/test_qdrant.py
@ -54,7 +54,7 @@ async def main():

    search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\n\Extracted summaries are:\n")
+    print("\nExtracted summaries are:\n")
    for result in search_results:
        print(f"{result}\n")

--- a/cognee/tests/test_weaviate.py
+++ b/cognee/tests/test_weaviate.py
@ -52,7 +52,7 @@ async def main():

    search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
    assert len(search_results) != 0, "Query related summaries don't exist."
-    print("\n\Extracted summaries are:\n")
+    print("\nExtracted summaries are:\n")
    for result in search_results:
        print(f"{result}\n")

--- a/log.txt
+++ b/log.txt
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -19,53 +19,51 @@ classifiers = [

 [tool.poetry.dependencies]
 python = ">=3.9.0,<3.12"
-openai = "1.27.0"
+openai = "1.52.0"
 pydantic = "2.8.2"
 python-dotenv = "1.0.1"
 fastapi = "^0.109.2"
 uvicorn = "0.22.0"
+requests = "2.32.3"
+aiohttp = "3.10.10"
+typing_extensions = "4.12.2"
+dspy = "2.5.25"
+nest_asyncio = "1.6.0"
+numpy = "1.26.4"
+datasets = "3.1.0"
+falkordb = "1.0.9"
 boto3 = "^1.26.125"
+botocore="^1.35.54"
 gunicorn = "^20.1.0"
 sqlalchemy = "2.0.35"
-instructor = "1.3.5"
+instructor = "1.6.3"
 networkx = "^3.2.1"
-debugpy = "1.8.2"
-pyarrow = "15.0.0"
-pylint = "^3.0.3"
 aiosqlite = "^0.20.0"
 pandas = "2.0.3"
-greenlet = "^3.0.3"
-ruff = "^0.2.2"
 filetype = "^1.2.0"
 nltk = "^3.8.1"
 dlt = {extras = ["sqlalchemy"], version = "^1.2.0"}
-overrides = "^7.7.0"
 aiofiles = "^23.2.1"
 qdrant-client = "^1.9.0"
 graphistry = "^0.33.5"
-tenacity = "^8.2.3"
+tenacity = "^9.0.0"
 weaviate-client = "4.6.7"
 scikit-learn = "^1.5.0"
-fastembed = "0.2.7"
 pypdf = "^4.1.0"
 neo4j = "^5.20.0"
 jinja2 = "^3.1.3"
 matplotlib = "^3.8.3"
-structlog = "^24.1.0"
 tiktoken = "0.7.0"
+langchain_text_splitters = "0.3.2" 
+langsmith = "0.1.139"
+langdetect = "1.0.9"
 posthog = "^3.5.0"
 lancedb = "0.8.0"
-litellm = "1.38.10"
+litellm = "1.49.1"
 groq = "0.8.0"
-tantivy = "^0.22.0"
-tokenizers ="0.15.2"
-transformers ="4.39.0"
-python-multipart = "^0.0.9"
 langfuse = "^2.32.0"
-protobuf = "<5.0.0"
 pydantic-settings = "^2.2.1"
 anthropic = "^0.26.1"
-pdfplumber = "^0.11.1"
 sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
 fastapi-users = { version = "*", extras = ["sqlalchemy"] }
 asyncpg = "^0.29.0"
@ -88,6 +86,11 @@ pytest-asyncio = "^0.21.1"
 coverage = "^7.3.2"
 mypy = "^1.7.1"
 notebook = "^7.1.1"
+deptry = "^0.20.0"
+debugpy = "1.8.2"
+pylint = "^3.0.3"
+ruff = "^0.2.2"
+tweepy = "4.14.0"

 [tool.poetry.group.docs.dependencies]
 mkdocs-material = "^9.5.42"