Merge latest COG-519

This commit is contained in:
Leon Luithlen 2024-11-29 14:36:03 +01:00
parent d4e77636b5
commit bc82430fb5
3 changed files with 15 additions and 10 deletions

View file

@ -4,6 +4,7 @@ from .guess_file_type import guess_file_type
class FileMetadata(TypedDict): class FileMetadata(TypedDict):
name: str name: str
file_path: str
mime_type: str mime_type: str
extension: str extension: str

View file

@ -4,14 +4,15 @@ import re
import warnings import warnings
from typing import Any from typing import Any
from uuid import UUID from uuid import UUID
from typing import Any, BinaryIO, Union
from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.infrastructure.files.utils.get_file_metadata import FileMetadata
from ..models.Metadata import Metadata from ..models.Metadata import Metadata
async def write_metadata(data_item: Any, data_id: UUID) -> UUID: async def write_metadata(data_item: Union[BinaryIO, str, Any], data_id: UUID, file_metadata: FileMetadata) -> UUID:
metadata_dict = get_metadata_dict(data_item) metadata_dict = get_metadata_dict(data_item, file_metadata)
db_engine = get_relational_engine() db_engine = get_relational_engine()
async with db_engine.get_async_session() as session: async with db_engine.get_async_session() as session:
metadata = Metadata( metadata = Metadata(
@ -34,14 +35,18 @@ def parse_type(type_: Any) -> str:
raise Exception(f"type: {type_} could not be parsed") raise Exception(f"type: {type_} could not be parsed")
def get_metadata_dict(metadata: Any) -> dict[str, Any]: def get_metadata_dict(data_item: Union[BinaryIO, str, Any], file_metadata: FileMetadata) -> dict[str, Any]:
if hasattr(metadata, "dict") and inspect.ismethod(getattr(metadata, "dict")): if isinstance(data_item, str):
return metadata.dict() return(file_metadata)
elif isinstance(data_item, BinaryIO):
return(file_metadata)
elif hasattr(data_item, "dict") and inspect.ismethod(getattr(data_item, "dict")):
return {**file_metadata, **data_item.dict()}
else: else:
warnings.warn( warnings.warn(
f"metadata of type {type(metadata)}: {str(metadata)[:20]}... does not have dict method. Defaulting to string method" f"metadata of type {type(data_item)}: {str(data_item)[:20]}... does not have dict method. Defaulting to string method"
) )
try: try:
return {"content": str(metadata)} return {**dict(file_metadata), "content": str(data_item)}
except Exception as e: except Exception as e:
raise Exception(f"Could not cast metadata to string: {e}") raise Exception(f"Could not cast metadata to string: {e}")

View file

@ -1,7 +1,6 @@
from typing import Any from typing import Any
import dlt import dlt
import cognee.modules.ingestion as ingestion import cognee.modules.ingestion as ingestion
from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.data.methods import create_dataset from cognee.modules.data.methods import create_dataset
@ -76,7 +75,7 @@ async def ingest_data_with_metadata(data: Any, dataset_name: str, user: User):
dataset.data.append(data_point) dataset.data.append(data_point)
await session.commit() await session.commit()
await write_metadata(data_item, data_point.id) await write_metadata(data_item, data_point.id, file_metadata)
yield { yield {