fix: remove obsolete files and fix unit tests
This commit is contained in:
parent
340a61b20a
commit
66427e725c
17 changed files with 40 additions and 113 deletions
|
|
@ -1,7 +1,11 @@
|
|||
import io
|
||||
from typing import BinaryIO, TypedDict
|
||||
from .guess_file_type import guess_file_type
|
||||
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.shared.utils import get_file_content_hash
|
||||
from .guess_file_type import guess_file_type
|
||||
|
||||
logger = get_logger("FileMetadata")
|
||||
|
||||
|
||||
class FileMetadata(TypedDict):
|
||||
|
|
@ -44,9 +48,7 @@ def get_file_metadata(file: BinaryIO) -> FileMetadata:
|
|||
content_hash = get_file_content_hash(file)
|
||||
file.seek(0)
|
||||
except io.UnsupportedOperation as error:
|
||||
raise Exception(
|
||||
f"Error retrieving metadata from file: {file.name} \n{str(error)}\n\n"
|
||||
) from error
|
||||
logger.error(f"Error retrieving content hash for file: {file.name} \n{str(error)}\n\n")
|
||||
|
||||
file_type = guess_file_type(file)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,25 +0,0 @@
|
|||
import asyncio
|
||||
import threading
|
||||
|
||||
|
||||
def run_sync(coro, timeout=None):
|
||||
result = None
|
||||
exception = None
|
||||
|
||||
def runner():
|
||||
nonlocal result, exception
|
||||
try:
|
||||
result = asyncio.run(coro)
|
||||
except Exception as e:
|
||||
exception = e
|
||||
|
||||
thread = threading.Thread(target=runner)
|
||||
thread.start()
|
||||
thread.join(timeout)
|
||||
|
||||
if thread.is_alive():
|
||||
raise asyncio.TimeoutError("Coroutine execution timed out.")
|
||||
if exception:
|
||||
raise exception
|
||||
|
||||
return result
|
||||
|
|
@ -47,18 +47,11 @@ def expand_with_nodes_and_edges(
|
|||
type_node_key = f"{type_node_id}_type"
|
||||
|
||||
if type_node_key not in added_nodes_map and type_node_key not in key_mapping:
|
||||
if ontology_resolver:
|
||||
(
|
||||
ontology_entity_type_nodes,
|
||||
ontology_entity_type_edges,
|
||||
ontology_closest_class_node,
|
||||
) = ontology_resolver.get_subgraph(
|
||||
node_name=type_node_name, node_type="classes"
|
||||
)
|
||||
else:
|
||||
ontology_entity_type_nodes = []
|
||||
ontology_entity_type_edges = []
|
||||
ontology_closest_class_node = None
|
||||
(
|
||||
ontology_entity_type_nodes,
|
||||
ontology_entity_type_edges,
|
||||
ontology_closest_class_node,
|
||||
) = ontology_resolver.get_subgraph(node_name=type_node_name, node_type="classes")
|
||||
|
||||
if ontology_closest_class_node:
|
||||
name_mapping[type_node_name] = ontology_closest_class_node.name
|
||||
|
|
@ -135,14 +128,9 @@ def expand_with_nodes_and_edges(
|
|||
entity_node_key = f"{node_id}_entity"
|
||||
|
||||
if entity_node_key not in added_nodes_map and entity_node_key not in key_mapping:
|
||||
if ontology_resolver:
|
||||
ontology_entity_nodes, ontology_entity_edges, start_ent_ont = (
|
||||
ontology_resolver.get_subgraph(node_name=node_name, node_type="individuals")
|
||||
)
|
||||
else:
|
||||
ontology_entity_nodes = []
|
||||
ontology_entity_edges = []
|
||||
start_ent_ont = None
|
||||
ontology_entity_nodes, ontology_entity_edges, start_ent_ont = (
|
||||
ontology_resolver.get_subgraph(node_name=node_name, node_type="individuals")
|
||||
)
|
||||
|
||||
if start_ent_ont:
|
||||
name_mapping[node_name] = start_ent_ont.name
|
||||
|
|
|
|||
|
|
@ -1,6 +0,0 @@
|
|||
import hashlib
|
||||
|
||||
|
||||
def get_text_content_hash(text: str) -> str:
|
||||
encoded_text = text.encode("utf-8")
|
||||
return hashlib.md5(encoded_text).hexdigest()
|
||||
|
|
@ -85,29 +85,6 @@ async def run_tasks_distributed(tasks, dataset_id, data, user, pipeline_name, co
|
|||
payload=result,
|
||||
)
|
||||
|
||||
# producer_futures = []
|
||||
|
||||
# for data_item in data[:5]:
|
||||
# producer_future = run_tasks_distributed(
|
||||
# run_tasks_with_telemetry, tasks, [data_item], user, pipeline_name, context
|
||||
# )
|
||||
# producer_futures.append(producer_future)
|
||||
|
||||
# batch_results = []
|
||||
# for producer_future in producer_futures:
|
||||
# try:
|
||||
# result = producer_future.get()
|
||||
# except Exception as e:
|
||||
# result = e
|
||||
# batch_results.append(result)
|
||||
|
||||
# yield PipelineRunYield(
|
||||
# pipeline_run_id=pipeline_run_id,
|
||||
# dataset_id=dataset.id,
|
||||
# dataset_name=dataset.name,
|
||||
# payload=result,
|
||||
# )
|
||||
|
||||
await log_pipeline_run_complete(pipeline_run_id, pipeline_id, pipeline_name, dataset_id, data)
|
||||
|
||||
yield PipelineRunCompleted(
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
import asyncio
|
||||
import random
|
||||
import time
|
||||
import random
|
||||
import pytest
|
||||
import asyncio
|
||||
from typing import List
|
||||
from uuid import NAMESPACE_OID, uuid5
|
||||
|
||||
|
||||
from cognee.infrastructure.engine import DataPoint
|
||||
from cognee.modules.graph.utils import get_graph_from_model
|
||||
|
||||
|
|
@ -55,6 +57,7 @@ def nanoseconds_to_largest_unit(nanoseconds):
|
|||
return nanoseconds, "nanoseconds"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_circular_reference_extraction():
|
||||
repo = Repository(path="repo1")
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
import pytest
|
||||
import asyncio
|
||||
from typing import List
|
||||
from uuid import NAMESPACE_OID, uuid5
|
||||
|
||||
|
||||
from cognee.infrastructure.engine import DataPoint
|
||||
from cognee.modules.graph.utils import get_graph_from_model
|
||||
|
||||
|
|
@ -32,6 +34,7 @@ class Entity(DataPoint):
|
|||
DocumentChunk.model_rebuild()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def get_graph_from_model_test():
|
||||
document = Document(path="file_path")
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
import pytest
|
||||
import asyncio
|
||||
import random
|
||||
from typing import List
|
||||
from uuid import NAMESPACE_OID, uuid5
|
||||
|
||||
|
||||
from cognee.infrastructure.engine import DataPoint
|
||||
from cognee.modules.graph.utils import get_graph_from_model
|
||||
|
||||
|
|
@ -33,6 +34,7 @@ class Entity(DataPoint):
|
|||
DocumentChunk.model_rebuild()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def get_graph_from_model_test():
|
||||
document = Document(path="file_path")
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import pytest
|
||||
from typing import List, Optional
|
||||
from typing import List
|
||||
from cognee.infrastructure.engine import DataPoint
|
||||
|
||||
from cognee.modules.graph.utils import get_graph_from_model
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import asyncio
|
|||
from queue import Queue
|
||||
|
||||
import cognee
|
||||
from cognee.modules.pipelines.operations.run_tasks import run_tasks_base
|
||||
from cognee.modules.pipelines.operations.run_tasks_base import run_tasks_base
|
||||
from cognee.modules.pipelines.tasks.task import Task
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.infrastructure.databases.relational import create_db_and_tables
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import asyncio
|
||||
|
||||
import cognee
|
||||
from cognee.modules.pipelines.operations.run_tasks import run_tasks_base
|
||||
from cognee.modules.pipelines.operations.run_tasks_base import run_tasks_base
|
||||
from cognee.modules.pipelines.tasks.task import Task
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.infrastructure.databases.relational import create_db_and_tables
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import asyncio
|
|||
import cognee
|
||||
from cognee.modules.pipelines.tasks.task import Task
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.modules.pipelines.operations.run_tasks import run_tasks_base
|
||||
from cognee.modules.pipelines.operations.run_tasks_base import run_tasks_base
|
||||
from cognee.infrastructure.databases.relational import create_db_and_tables
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import asyncio
|
||||
import os
|
||||
import asyncio
|
||||
|
||||
import cognee
|
||||
from cognee.api.v1.prune import prune
|
||||
|
|
@ -45,10 +45,10 @@ async def main():
|
|||
worker_future = data_point_saving_worker.spawn()
|
||||
consumer_futures.append(worker_future)
|
||||
|
||||
# s3_bucket_name = "s3://s3-test-laszlo/Database for KG v1"
|
||||
s3_bucket_name = "s3://s3-test-laszlo/Pdf"
|
||||
s3_bucket_path = os.getenv("S3_BUCKET_PATH")
|
||||
s3_data_path = "s3://" + s3_bucket_path
|
||||
|
||||
await cognee.add(s3_bucket_name, dataset_name="s3-files")
|
||||
await cognee.add(s3_data_path, dataset_name="s3-files")
|
||||
|
||||
await cognee.cognify(datasets=["s3-files"])
|
||||
|
||||
|
|
|
|||
|
|
@ -1,22 +1,11 @@
|
|||
import json
|
||||
import pathlib
|
||||
from os import path
|
||||
from modal import Image
|
||||
from logging import getLogger
|
||||
from dotenv import dotenv_values
|
||||
|
||||
logger = getLogger("modal_image_creation")
|
||||
|
||||
local_env_vars = dict(dotenv_values(".env"))
|
||||
|
||||
logger.debug("Modal deployment started with the following environmental variables:")
|
||||
logger.debug(json.dumps(local_env_vars, indent=4))
|
||||
|
||||
image = (
|
||||
Image.from_dockerfile(
|
||||
path=pathlib.Path(path.join(path.dirname(__file__), "Dockerfile")).resolve(),
|
||||
force_build=False,
|
||||
)
|
||||
.env(local_env_vars)
|
||||
.add_local_python_source("cognee", "entrypoint")
|
||||
)
|
||||
image = Image.from_dockerfile(
|
||||
path=pathlib.Path(path.join(path.dirname(__file__), "Dockerfile")).resolve(),
|
||||
force_build=False,
|
||||
).add_local_python_source("cognee", "entrypoint")
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
from grpclib import GRPCError
|
||||
|
||||
|
||||
async def queued_add_data_points(collection_name, data_points_batch):
|
||||
from grpclib import GRPCError
|
||||
from ..queues import add_data_points_queue
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
from grpclib import GRPCError
|
||||
|
||||
|
||||
async def queued_add_edges(edge_batch):
|
||||
from grpclib import GRPCError
|
||||
from ..queues import add_nodes_and_edges_queue
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
from grpclib import GRPCError
|
||||
|
||||
|
||||
async def queued_add_nodes(node_batch):
|
||||
from grpclib import GRPCError
|
||||
from ..queues import add_nodes_and_edges_queue
|
||||
|
||||
try:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue