Merge latest COG-519
This commit is contained in:
parent
d4e77636b5
commit
bc82430fb5
3 changed files with 15 additions and 10 deletions
|
|
@ -4,6 +4,7 @@ from .guess_file_type import guess_file_type
|
|||
|
||||
class FileMetadata(TypedDict):
|
||||
name: str
|
||||
file_path: str
|
||||
mime_type: str
|
||||
extension: str
|
||||
|
||||
|
|
|
|||
|
|
@ -4,14 +4,15 @@ import re
|
|||
import warnings
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
from typing import Any, BinaryIO, Union
|
||||
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
|
||||
from cognee.infrastructure.files.utils.get_file_metadata import FileMetadata
|
||||
from ..models.Metadata import Metadata
|
||||
|
||||
|
||||
async def write_metadata(data_item: Any, data_id: UUID) -> UUID:
|
||||
metadata_dict = get_metadata_dict(data_item)
|
||||
async def write_metadata(data_item: Union[BinaryIO, str, Any], data_id: UUID, file_metadata: FileMetadata) -> UUID:
|
||||
metadata_dict = get_metadata_dict(data_item, file_metadata)
|
||||
db_engine = get_relational_engine()
|
||||
async with db_engine.get_async_session() as session:
|
||||
metadata = Metadata(
|
||||
|
|
@ -34,14 +35,18 @@ def parse_type(type_: Any) -> str:
|
|||
raise Exception(f"type: {type_} could not be parsed")
|
||||
|
||||
|
||||
def get_metadata_dict(metadata: Any) -> dict[str, Any]:
|
||||
if hasattr(metadata, "dict") and inspect.ismethod(getattr(metadata, "dict")):
|
||||
return metadata.dict()
|
||||
def get_metadata_dict(data_item: Union[BinaryIO, str, Any], file_metadata: FileMetadata) -> dict[str, Any]:
|
||||
if isinstance(data_item, str):
|
||||
return(file_metadata)
|
||||
elif isinstance(data_item, BinaryIO):
|
||||
return(file_metadata)
|
||||
elif hasattr(data_item, "dict") and inspect.ismethod(getattr(data_item, "dict")):
|
||||
return {**file_metadata, **data_item.dict()}
|
||||
else:
|
||||
warnings.warn(
|
||||
f"metadata of type {type(metadata)}: {str(metadata)[:20]}... does not have dict method. Defaulting to string method"
|
||||
f"metadata of type {type(data_item)}: {str(data_item)[:20]}... does not have dict method. Defaulting to string method"
|
||||
)
|
||||
try:
|
||||
return {"content": str(metadata)}
|
||||
return {**dict(file_metadata), "content": str(data_item)}
|
||||
except Exception as e:
|
||||
raise Exception(f"Could not cast metadata to string: {e}")
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
from typing import Any
|
||||
|
||||
import dlt
|
||||
|
||||
import cognee.modules.ingestion as ingestion
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from cognee.modules.data.methods import create_dataset
|
||||
|
|
@ -76,7 +75,7 @@ async def ingest_data_with_metadata(data: Any, dataset_name: str, user: User):
|
|||
|
||||
dataset.data.append(data_point)
|
||||
await session.commit()
|
||||
await write_metadata(data_item, data_point.id)
|
||||
await write_metadata(data_item, data_point.id, file_metadata)
|
||||
|
||||
|
||||
yield {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue