Merge latest COG-519
This commit is contained in:
parent
d4e77636b5
commit
bc82430fb5
3 changed files with 15 additions and 10 deletions
|
|
@ -4,6 +4,7 @@ from .guess_file_type import guess_file_type
|
||||||
|
|
||||||
class FileMetadata(TypedDict):
|
class FileMetadata(TypedDict):
|
||||||
name: str
|
name: str
|
||||||
|
file_path: str
|
||||||
mime_type: str
|
mime_type: str
|
||||||
extension: str
|
extension: str
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,14 +4,15 @@ import re
|
||||||
import warnings
|
import warnings
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
from typing import Any, BinaryIO, Union
|
||||||
|
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from cognee.infrastructure.files.utils.get_file_metadata import FileMetadata
|
||||||
from ..models.Metadata import Metadata
|
from ..models.Metadata import Metadata
|
||||||
|
|
||||||
|
|
||||||
async def write_metadata(data_item: Any, data_id: UUID) -> UUID:
|
async def write_metadata(data_item: Union[BinaryIO, str, Any], data_id: UUID, file_metadata: FileMetadata) -> UUID:
|
||||||
metadata_dict = get_metadata_dict(data_item)
|
metadata_dict = get_metadata_dict(data_item, file_metadata)
|
||||||
db_engine = get_relational_engine()
|
db_engine = get_relational_engine()
|
||||||
async with db_engine.get_async_session() as session:
|
async with db_engine.get_async_session() as session:
|
||||||
metadata = Metadata(
|
metadata = Metadata(
|
||||||
|
|
@ -34,14 +35,18 @@ def parse_type(type_: Any) -> str:
|
||||||
raise Exception(f"type: {type_} could not be parsed")
|
raise Exception(f"type: {type_} could not be parsed")
|
||||||
|
|
||||||
|
|
||||||
def get_metadata_dict(metadata: Any) -> dict[str, Any]:
|
def get_metadata_dict(data_item: Union[BinaryIO, str, Any], file_metadata: FileMetadata) -> dict[str, Any]:
|
||||||
if hasattr(metadata, "dict") and inspect.ismethod(getattr(metadata, "dict")):
|
if isinstance(data_item, str):
|
||||||
return metadata.dict()
|
return(file_metadata)
|
||||||
|
elif isinstance(data_item, BinaryIO):
|
||||||
|
return(file_metadata)
|
||||||
|
elif hasattr(data_item, "dict") and inspect.ismethod(getattr(data_item, "dict")):
|
||||||
|
return {**file_metadata, **data_item.dict()}
|
||||||
else:
|
else:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
f"metadata of type {type(metadata)}: {str(metadata)[:20]}... does not have dict method. Defaulting to string method"
|
f"metadata of type {type(data_item)}: {str(data_item)[:20]}... does not have dict method. Defaulting to string method"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
return {"content": str(metadata)}
|
return {**dict(file_metadata), "content": str(data_item)}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception(f"Could not cast metadata to string: {e}")
|
raise Exception(f"Could not cast metadata to string: {e}")
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import dlt
|
import dlt
|
||||||
|
|
||||||
import cognee.modules.ingestion as ingestion
|
import cognee.modules.ingestion as ingestion
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
from cognee.modules.data.methods import create_dataset
|
from cognee.modules.data.methods import create_dataset
|
||||||
|
|
@ -76,7 +75,7 @@ async def ingest_data_with_metadata(data: Any, dataset_name: str, user: User):
|
||||||
|
|
||||||
dataset.data.append(data_point)
|
dataset.data.append(data_point)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
await write_metadata(data_item, data_point.id)
|
await write_metadata(data_item, data_point.id, file_metadata)
|
||||||
|
|
||||||
|
|
||||||
yield {
|
yield {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue