Get metadata from metadata table

This commit is contained in:
Leon Luithlen 2024-11-27 16:27:55 +01:00
parent cd0e505ac0
commit 15802237e9
3 changed files with 8 additions and 3 deletions

View file

@ -14,4 +14,6 @@ async def get_metadata(metadata_id: UUID) -> Metadata:
async with db_engine.get_async_session() as session:
metadata = await session.get(Metadata, metadata_id)
return json.parse(metadata)
return metadata

View file

@ -15,6 +15,7 @@ async def write_metadata(data_item: Any, data_id: UUID) -> UUID:
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
metadata = Metadata(
id=data_id,
metadata_repr=json.dumps(metadata_dict),
metadata_source=parse_type(type(data_item)),
data_id=data_id

View file

@ -6,6 +6,7 @@ from cognee.modules.data.processing.document_types import (
ImageDocument,
TextDocument,
)
from cognee.modules.data.operations.get_metadata import get_metadata
EXTENSION_TO_DOCUMENT_CLASS = {
"pdf": PdfDocument, # Text documents
@ -38,15 +39,16 @@ EXTENSION_TO_DOCUMENT_CLASS = {
}
def classify_documents(data_documents: list[Data]) -> list[Document]:
async def classify_documents(data_documents: list[Data]) -> list[Document]:
documents = []
for data_item in data_documents:
metadata = await get_metadata(data_item.id)
document = EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](
id=data_item.id,
title=f"{data_item.name}.{data_item.extension}",
raw_data_location=data_item.raw_data_location,
name=data_item.name,
metadata_id=data_item.metadata_id
metadata_id=metadata.id
)
documents.append(document)