Merge pull request #176 from topoteretes/fix/integration-test-warnings
COG-485 - Fix/integration test warnings
This commit is contained in:
commit
d3d49b64be
21 changed files with 1370 additions and 1292 deletions
|
|
@ -146,7 +146,7 @@ class DatasetDTO(OutDTO):
|
||||||
id: UUID
|
id: UUID
|
||||||
name: str
|
name: str
|
||||||
created_at: datetime
|
created_at: datetime
|
||||||
updated_at: Optional[datetime]
|
updated_at: Optional[datetime] = None
|
||||||
owner_id: UUID
|
owner_id: UUID
|
||||||
|
|
||||||
@app.get("/api/v1/datasets", response_model = list[DatasetDTO])
|
@app.get("/api/v1/datasets", response_model = list[DatasetDTO])
|
||||||
|
|
@ -200,7 +200,7 @@ class DataDTO(OutDTO):
|
||||||
id: UUID
|
id: UUID
|
||||||
name: str
|
name: str
|
||||||
created_at: datetime
|
created_at: datetime
|
||||||
updated_at: Optional[datetime]
|
updated_at: Optional[datetime] = None
|
||||||
extension: str
|
extension: str
|
||||||
mime_type: str
|
mime_type: str
|
||||||
raw_data_location: str
|
raw_data_location: str
|
||||||
|
|
|
||||||
|
|
@ -1,25 +0,0 @@
|
||||||
from typing import List, Optional
|
|
||||||
from fastembed import TextEmbedding
|
|
||||||
from cognee.root_dir import get_absolute_path
|
|
||||||
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
|
||||||
|
|
||||||
class FastembedEmbeddingEngine(EmbeddingEngine):
|
|
||||||
embedding_model: str
|
|
||||||
embedding_dimensions: int
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
embedding_model: Optional[str] = "BAAI/bge-large-en-v1.5",
|
|
||||||
embedding_dimensions: Optional[int] = 1024,
|
|
||||||
):
|
|
||||||
self.embedding_model = embedding_model
|
|
||||||
self.embedding_dimensions = embedding_dimensions
|
|
||||||
|
|
||||||
async def embed_text(self, text: List[str]) -> List[float]:
|
|
||||||
embedding_model = TextEmbedding(model_name = self.embedding_model, cache_dir = get_absolute_path("cache/embeddings"))
|
|
||||||
embeddings_list = list(map(lambda embedding: embedding.tolist(), embedding_model.embed(text)))
|
|
||||||
|
|
||||||
return embeddings_list
|
|
||||||
|
|
||||||
def get_vector_size(self) -> int:
|
|
||||||
return self.embedding_dimensions
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
from typing import BinaryIO
|
from typing import BinaryIO
|
||||||
from pypdf import PdfReader
|
from pypdf import PdfReader
|
||||||
|
import filetype
|
||||||
|
|
||||||
def extract_text_from_file(file: BinaryIO, file_type) -> str:
|
def extract_text_from_file(file: BinaryIO, file_type: filetype.Type) -> str:
|
||||||
"""Extract text from a file"""
|
"""Extract text from a file"""
|
||||||
if file_type.extension == "pdf":
|
if file_type.extension == "pdf":
|
||||||
reader = PdfReader(stream = file)
|
reader = PdfReader(stream = file)
|
||||||
|
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
import os
|
|
||||||
|
|
||||||
def get_file_size(file_path: str):
|
|
||||||
"""Get the size of a file"""
|
|
||||||
return os.path.getsize(file_path)
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
import dsp
|
|
||||||
import dspy
|
import dspy
|
||||||
from dspy.evaluate.evaluate import Evaluate
|
from dspy.evaluate.evaluate import Evaluate
|
||||||
from dspy.primitives.example import Example
|
from dspy.primitives.example import Example
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
import dsp
|
|
||||||
import dspy
|
import dspy
|
||||||
from dspy.teleprompt import BootstrapFewShot
|
from dspy.teleprompt import BootstrapFewShot
|
||||||
from dspy.primitives.example import Example
|
from dspy.primitives.example import Example
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from .models.Task import Task
|
||||||
|
|
||||||
class PipelineConfig(BaseModel):
|
class PipelineConfig(BaseModel):
|
||||||
batch_count: int = 10
|
batch_count: int = 10
|
||||||
description: Optional[str]
|
description: Optional[str] = None
|
||||||
|
|
||||||
class Pipeline():
|
class Pipeline():
|
||||||
id: UUID = uuid4()
|
id: UUID = uuid4()
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
from typing import Any, Callable, Generator
|
from typing import Any, Callable, Generator, List
|
||||||
import asyncio
|
import asyncio
|
||||||
from ..tasks.Task import Task
|
from ..tasks.Task import Task
|
||||||
|
|
||||||
def run_tasks_parallel(tasks: [Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
|
def run_tasks_parallel(tasks: List[Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
|
||||||
async def parallel_run(*args, **kwargs):
|
async def parallel_run(*args, **kwargs):
|
||||||
parallel_tasks = [asyncio.create_task(task.run(*args, **kwargs)) for task in tasks]
|
parallel_tasks = [asyncio.create_task(task.run(*args, **kwargs)) for task in tasks]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ class Directory(BaseModel):
|
||||||
directories: List['Directory'] = []
|
directories: List['Directory'] = []
|
||||||
|
|
||||||
# Allows recursive Directory Model
|
# Allows recursive Directory Model
|
||||||
Directory.update_forward_refs()
|
Directory.model_rebuild()
|
||||||
|
|
||||||
class RepositoryProperties(BaseModel):
|
class RepositoryProperties(BaseModel):
|
||||||
custom_properties: Optional[Dict[str, Any]] = None
|
custom_properties: Optional[Dict[str, Any]] = None
|
||||||
|
|
|
||||||
|
|
@ -6,15 +6,15 @@ class BaseClass(BaseModel):
|
||||||
name: str
|
name: str
|
||||||
type: Literal["Class"] = "Class"
|
type: Literal["Class"] = "Class"
|
||||||
description: str
|
description: str
|
||||||
constructor_parameters: Optional[List[str]]
|
constructor_parameters: Optional[List[str]] = None
|
||||||
|
|
||||||
class Class(BaseModel):
|
class Class(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
name: str
|
name: str
|
||||||
type: Literal["Class"] = "Class"
|
type: Literal["Class"] = "Class"
|
||||||
description: str
|
description: str
|
||||||
constructor_parameters: Optional[List[str]]
|
constructor_parameters: Optional[List[str]] = None
|
||||||
from_class: Optional[BaseClass]
|
from_class: Optional[BaseClass] = None
|
||||||
|
|
||||||
class ClassInstance(BaseModel):
|
class ClassInstance(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
|
|
@ -28,7 +28,7 @@ class Function(BaseModel):
|
||||||
name: str
|
name: str
|
||||||
type: Literal["Function"] = "Function"
|
type: Literal["Function"] = "Function"
|
||||||
description: str
|
description: str
|
||||||
parameters: Optional[List[str]]
|
parameters: Optional[List[str]] = None
|
||||||
return_type: str
|
return_type: str
|
||||||
is_static: Optional[bool] = False
|
is_static: Optional[bool] = False
|
||||||
|
|
||||||
|
|
@ -38,7 +38,7 @@ class Variable(BaseModel):
|
||||||
type: Literal["Variable"] = "Variable"
|
type: Literal["Variable"] = "Variable"
|
||||||
description: str
|
description: str
|
||||||
is_static: Optional[bool] = False
|
is_static: Optional[bool] = False
|
||||||
default_value: Optional[str]
|
default_value: Optional[str] = None
|
||||||
|
|
||||||
class Operator(BaseModel):
|
class Operator(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
|
||||||
data_points.append(
|
data_points.append(
|
||||||
DataPoint[Keyword](
|
DataPoint[Keyword](
|
||||||
id=str(classification_type_id),
|
id=str(classification_type_id),
|
||||||
payload=Keyword.parse_obj({
|
payload=Keyword.model_validate({
|
||||||
"uuid": str(classification_type_id),
|
"uuid": str(classification_type_id),
|
||||||
"text": classification_type_label,
|
"text": classification_type_label,
|
||||||
"chunk_id": str(data_chunk.chunk_id),
|
"chunk_id": str(data_chunk.chunk_id),
|
||||||
|
|
@ -98,7 +98,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
|
||||||
data_points.append(
|
data_points.append(
|
||||||
DataPoint[Keyword](
|
DataPoint[Keyword](
|
||||||
id=str(classification_subtype_id),
|
id=str(classification_subtype_id),
|
||||||
payload=Keyword.parse_obj({
|
payload=Keyword.model_validate({
|
||||||
"uuid": str(classification_subtype_id),
|
"uuid": str(classification_subtype_id),
|
||||||
"text": classification_subtype_label,
|
"text": classification_subtype_label,
|
||||||
"chunk_id": str(data_chunk.chunk_id),
|
"chunk_id": str(data_chunk.chunk_id),
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ class OntologyEngine:
|
||||||
for item in items:
|
for item in items:
|
||||||
flat_list.extend(await self.recursive_flatten(item, parent_id))
|
flat_list.extend(await self.recursive_flatten(item, parent_id))
|
||||||
elif isinstance(items, dict):
|
elif isinstance(items, dict):
|
||||||
model = NodeModel.parse_obj(items)
|
model = NodeModel.model_validate(items)
|
||||||
flat_list.append(await self.flatten_model(model, parent_id))
|
flat_list.append(await self.flatten_model(model, parent_id))
|
||||||
for child in model.children:
|
for child in model.children:
|
||||||
flat_list.extend(await self.recursive_flatten(child, model.node_id))
|
flat_list.extend(await self.recursive_flatten(child, model.node_id))
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ class NodeModel(BaseModel):
|
||||||
default_relationship: Optional[RelationshipModel] = None
|
default_relationship: Optional[RelationshipModel] = None
|
||||||
children: List[Union[Dict[str, Any], "NodeModel"]] = Field(default_factory=list)
|
children: List[Union[Dict[str, Any], "NodeModel"]] = Field(default_factory=list)
|
||||||
|
|
||||||
NodeModel.update_forward_refs()
|
NodeModel.model_rebuild()
|
||||||
|
|
||||||
|
|
||||||
class OntologyNode(BaseModel):
|
class OntologyNode(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ async def save_chunks_to_store(data_chunks: list[DocumentChunk], collection_name
|
||||||
|
|
||||||
# Remove and unlink existing chunks
|
# Remove and unlink existing chunks
|
||||||
if await vector_engine.has_collection(collection_name):
|
if await vector_engine.has_collection(collection_name):
|
||||||
existing_chunks = [DocumentChunk.parse_obj(chunk.payload) for chunk in (await vector_engine.retrieve(
|
existing_chunks = [DocumentChunk.model_validate(chunk.payload) for chunk in (await vector_engine.retrieve(
|
||||||
collection_name,
|
collection_name,
|
||||||
[str(chunk.chunk_id) for chunk in data_chunks],
|
[str(chunk.chunk_id) for chunk in data_chunks],
|
||||||
))]
|
))]
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,7 @@ async def main():
|
||||||
|
|
||||||
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
||||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||||
print("\n\Extracted summaries are:\n")
|
print("\nExtracted summaries are:\n")
|
||||||
for result in search_results:
|
for result in search_results:
|
||||||
print(f"{result}\n")
|
print(f"{result}\n")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ async def main():
|
||||||
|
|
||||||
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
||||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||||
print("\n\Extracted summaries are:\n")
|
print("\nExtracted summaries are:\n")
|
||||||
for result in search_results:
|
for result in search_results:
|
||||||
print(f"{result}\n")
|
print(f"{result}\n")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ async def main():
|
||||||
|
|
||||||
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
||||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||||
print("\n\Extracted summaries are:\n")
|
print("\nExtracted summaries are:\n")
|
||||||
for result in search_results:
|
for result in search_results:
|
||||||
print(f"{result}\n")
|
print(f"{result}\n")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ async def main():
|
||||||
|
|
||||||
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
||||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||||
print("\n\Extracted summaries are:\n")
|
print("\nExtracted summaries are:\n")
|
||||||
for result in search_results:
|
for result in search_results:
|
||||||
print(f"{result}\n")
|
print(f"{result}\n")
|
||||||
|
|
||||||
|
|
|
||||||
0
log.txt
Normal file
0
log.txt
Normal file
2548
poetry.lock
generated
2548
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -19,53 +19,51 @@ classifiers = [
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = ">=3.9.0,<3.12"
|
python = ">=3.9.0,<3.12"
|
||||||
openai = "1.27.0"
|
openai = "1.52.0"
|
||||||
pydantic = "2.8.2"
|
pydantic = "2.8.2"
|
||||||
python-dotenv = "1.0.1"
|
python-dotenv = "1.0.1"
|
||||||
fastapi = "^0.109.2"
|
fastapi = "^0.109.2"
|
||||||
uvicorn = "0.22.0"
|
uvicorn = "0.22.0"
|
||||||
|
requests = "2.32.3"
|
||||||
|
aiohttp = "3.10.10"
|
||||||
|
typing_extensions = "4.12.2"
|
||||||
|
dspy = "2.5.25"
|
||||||
|
nest_asyncio = "1.6.0"
|
||||||
|
numpy = "1.26.4"
|
||||||
|
datasets = "3.1.0"
|
||||||
|
falkordb = "1.0.9"
|
||||||
boto3 = "^1.26.125"
|
boto3 = "^1.26.125"
|
||||||
|
botocore="^1.35.54"
|
||||||
gunicorn = "^20.1.0"
|
gunicorn = "^20.1.0"
|
||||||
sqlalchemy = "2.0.35"
|
sqlalchemy = "2.0.35"
|
||||||
instructor = "1.3.5"
|
instructor = "1.6.3"
|
||||||
networkx = "^3.2.1"
|
networkx = "^3.2.1"
|
||||||
debugpy = "1.8.2"
|
|
||||||
pyarrow = "15.0.0"
|
|
||||||
pylint = "^3.0.3"
|
|
||||||
aiosqlite = "^0.20.0"
|
aiosqlite = "^0.20.0"
|
||||||
pandas = "2.0.3"
|
pandas = "2.0.3"
|
||||||
greenlet = "^3.0.3"
|
|
||||||
ruff = "^0.2.2"
|
|
||||||
filetype = "^1.2.0"
|
filetype = "^1.2.0"
|
||||||
nltk = "^3.8.1"
|
nltk = "^3.8.1"
|
||||||
dlt = {extras = ["sqlalchemy"], version = "^1.2.0"}
|
dlt = {extras = ["sqlalchemy"], version = "^1.2.0"}
|
||||||
overrides = "^7.7.0"
|
|
||||||
aiofiles = "^23.2.1"
|
aiofiles = "^23.2.1"
|
||||||
qdrant-client = "^1.9.0"
|
qdrant-client = "^1.9.0"
|
||||||
graphistry = "^0.33.5"
|
graphistry = "^0.33.5"
|
||||||
tenacity = "^8.2.3"
|
tenacity = "^9.0.0"
|
||||||
weaviate-client = "4.6.7"
|
weaviate-client = "4.6.7"
|
||||||
scikit-learn = "^1.5.0"
|
scikit-learn = "^1.5.0"
|
||||||
fastembed = "0.2.7"
|
|
||||||
pypdf = "^4.1.0"
|
pypdf = "^4.1.0"
|
||||||
neo4j = "^5.20.0"
|
neo4j = "^5.20.0"
|
||||||
jinja2 = "^3.1.3"
|
jinja2 = "^3.1.3"
|
||||||
matplotlib = "^3.8.3"
|
matplotlib = "^3.8.3"
|
||||||
structlog = "^24.1.0"
|
|
||||||
tiktoken = "0.7.0"
|
tiktoken = "0.7.0"
|
||||||
|
langchain_text_splitters = "0.3.2"
|
||||||
|
langsmith = "0.1.139"
|
||||||
|
langdetect = "1.0.9"
|
||||||
posthog = "^3.5.0"
|
posthog = "^3.5.0"
|
||||||
lancedb = "0.8.0"
|
lancedb = "0.8.0"
|
||||||
litellm = "1.38.10"
|
litellm = "1.49.1"
|
||||||
groq = "0.8.0"
|
groq = "0.8.0"
|
||||||
tantivy = "^0.22.0"
|
|
||||||
tokenizers ="0.15.2"
|
|
||||||
transformers ="4.39.0"
|
|
||||||
python-multipart = "^0.0.9"
|
|
||||||
langfuse = "^2.32.0"
|
langfuse = "^2.32.0"
|
||||||
protobuf = "<5.0.0"
|
|
||||||
pydantic-settings = "^2.2.1"
|
pydantic-settings = "^2.2.1"
|
||||||
anthropic = "^0.26.1"
|
anthropic = "^0.26.1"
|
||||||
pdfplumber = "^0.11.1"
|
|
||||||
sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
|
sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
|
||||||
fastapi-users = { version = "*", extras = ["sqlalchemy"] }
|
fastapi-users = { version = "*", extras = ["sqlalchemy"] }
|
||||||
asyncpg = "^0.29.0"
|
asyncpg = "^0.29.0"
|
||||||
|
|
@ -88,6 +86,11 @@ pytest-asyncio = "^0.21.1"
|
||||||
coverage = "^7.3.2"
|
coverage = "^7.3.2"
|
||||||
mypy = "^1.7.1"
|
mypy = "^1.7.1"
|
||||||
notebook = "^7.1.1"
|
notebook = "^7.1.1"
|
||||||
|
deptry = "^0.20.0"
|
||||||
|
debugpy = "1.8.2"
|
||||||
|
pylint = "^3.0.3"
|
||||||
|
ruff = "^0.2.2"
|
||||||
|
tweepy = "4.14.0"
|
||||||
|
|
||||||
[tool.poetry.group.docs.dependencies]
|
[tool.poetry.group.docs.dependencies]
|
||||||
mkdocs-material = "^9.5.42"
|
mkdocs-material = "^9.5.42"
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue