From 15802237e973711db1f08b5fbd831c24360e0c2b Mon Sep 17 00:00:00 2001 From: Leon Luithlen Date: Wed, 27 Nov 2024 16:27:55 +0100 Subject: [PATCH] Get metadata from metadata table --- cognee/modules/data/operations/get_metadata.py | 4 +++- cognee/modules/data/operations/write_metadata.py | 1 + cognee/tasks/documents/classify_documents.py | 6 ++++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/cognee/modules/data/operations/get_metadata.py b/cognee/modules/data/operations/get_metadata.py index 9034b327d..26637e383 100644 --- a/cognee/modules/data/operations/get_metadata.py +++ b/cognee/modules/data/operations/get_metadata.py @@ -14,4 +14,6 @@ async def get_metadata(metadata_id: UUID) -> Metadata: async with db_engine.get_async_session() as session: metadata = await session.get(Metadata, metadata_id) - return json.parse(metadata) + return metadata + + diff --git a/cognee/modules/data/operations/write_metadata.py b/cognee/modules/data/operations/write_metadata.py index 749aed831..4b550a6bf 100644 --- a/cognee/modules/data/operations/write_metadata.py +++ b/cognee/modules/data/operations/write_metadata.py @@ -15,6 +15,7 @@ async def write_metadata(data_item: Any, data_id: UUID) -> UUID: db_engine = get_relational_engine() async with db_engine.get_async_session() as session: metadata = Metadata( + id=data_id, metadata_repr=json.dumps(metadata_dict), metadata_source=parse_type(type(data_item)), data_id=data_id diff --git a/cognee/tasks/documents/classify_documents.py b/cognee/tasks/documents/classify_documents.py index 79ad8245f..41ffc45bd 100644 --- a/cognee/tasks/documents/classify_documents.py +++ b/cognee/tasks/documents/classify_documents.py @@ -6,6 +6,7 @@ from cognee.modules.data.processing.document_types import ( ImageDocument, TextDocument, ) +from cognee.modules.data.operations.get_metadata import get_metadata EXTENSION_TO_DOCUMENT_CLASS = { "pdf": PdfDocument, # Text documents @@ -38,15 +39,16 @@ EXTENSION_TO_DOCUMENT_CLASS = { } -def classify_documents(data_documents: list[Data]) -> list[Document]: +async def classify_documents(data_documents: list[Data]) -> list[Document]: documents = [] for data_item in data_documents: + metadata = await get_metadata(data_item.id) document = EXTENSION_TO_DOCUMENT_CLASS[data_item.extension]( id=data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location, name=data_item.name, - metadata_id=data_item.metadata_id + metadata_id=metadata.id ) documents.append(document)