diff --git a/cognee/modules/data/models/Data.py b/cognee/modules/data/models/Data.py index 285a4054c..0c0d60d0d 100644 --- a/cognee/modules/data/models/Data.py +++ b/cognee/modules/data/models/Data.py @@ -19,7 +19,7 @@ class Data(Base): raw_data_location = Column(String) owner_id = Column(UUID, index=True) content_hash = Column(String) - foreign_metadata = Column(JSON) + external_metadata = Column(JSON) created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) diff --git a/cognee/modules/data/processing/document_types/Document.py b/cognee/modules/data/processing/document_types/Document.py index e1bbb70ba..4d9f3bf72 100644 --- a/cognee/modules/data/processing/document_types/Document.py +++ b/cognee/modules/data/processing/document_types/Document.py @@ -7,7 +7,7 @@ from cognee.infrastructure.engine import DataPoint class Document(DataPoint): name: str raw_data_location: str - foreign_metadata: Optional[str] + external_metadata: Optional[str] mime_type: str _metadata: dict = {"index_fields": ["name"], "type": "Document"} diff --git a/cognee/tasks/documents/classify_documents.py b/cognee/tasks/documents/classify_documents.py index b1cc6a71d..8aa147b00 100644 --- a/cognee/tasks/documents/classify_documents.py +++ b/cognee/tasks/documents/classify_documents.py @@ -58,7 +58,7 @@ async def classify_documents(data_documents: list[Data]) -> list[Document]: raw_data_location=data_item.raw_data_location, name=data_item.name, mime_type=data_item.mime_type, - foreign_metadata=json.dumps(data_item.foreign_metadata, indent=4), + external_metadata=json.dumps(data_item.external_metadata, indent=4), ) documents.append(document) diff --git a/cognee/tasks/ingestion/ingest_data.py b/cognee/tasks/ingestion/ingest_data.py index 7b63f7b3f..924ef10b0 100644 --- a/cognee/tasks/ingestion/ingest_data.py +++ b/cognee/tasks/ingestion/ingest_data.py @@ -26,7 +26,7 @@ async def ingest_data(data: Any, dataset_name: str, user: User): destination=destination, ) - def get_foreign_metadata_dict(data_item: Union[BinaryIO, str, Any]) -> dict[str, Any]: + def get_external_metadata_dict(data_item: Union[BinaryIO, str, Any]) -> dict[str, Any]: if hasattr(data_item, "dict") and inspect.ismethod(getattr(data_item, "dict")): return {"metadata": data_item.dict(), "origin": str(type(data_item))} else: @@ -95,7 +95,7 @@ async def ingest_data(data: Any, dataset_name: str, user: User): data_point.mime_type = file_metadata["mime_type"] data_point.owner_id = user.id data_point.content_hash = file_metadata["content_hash"] - data_point.foreign_metadata = (get_foreign_metadata_dict(data_item),) + data_point.external_metadata = (get_external_metadata_dict(data_item),) await session.merge(data_point) else: data_point = Data( @@ -106,7 +106,7 @@ async def ingest_data(data: Any, dataset_name: str, user: User): mime_type=file_metadata["mime_type"], owner_id=user.id, content_hash=file_metadata["content_hash"], - foreign_metadata=get_foreign_metadata_dict(data_item), + external_metadata=get_external_metadata_dict(data_item), ) # Check if data is already in dataset diff --git a/cognee/tests/integration/documents/AudioDocument_test.py b/cognee/tests/integration/documents/AudioDocument_test.py index c079e2122..9719d90fc 100644 --- a/cognee/tests/integration/documents/AudioDocument_test.py +++ b/cognee/tests/integration/documents/AudioDocument_test.py @@ -29,7 +29,7 @@ def test_AudioDocument(): id=uuid.uuid4(), name="audio-dummy-test", raw_data_location="", - foreign_metadata="", + external_metadata="", mime_type="", ) with patch.object(AudioDocument, "create_transcript", return_value=TEST_TEXT): diff --git a/cognee/tests/integration/documents/ImageDocument_test.py b/cognee/tests/integration/documents/ImageDocument_test.py index c7e3ee82d..bd15961ee 100644 --- a/cognee/tests/integration/documents/ImageDocument_test.py +++ b/cognee/tests/integration/documents/ImageDocument_test.py @@ -18,7 +18,7 @@ def test_ImageDocument(): id=uuid.uuid4(), name="image-dummy-test", raw_data_location="", - foreign_metadata="", + external_metadata="", mime_type="", ) with patch.object(ImageDocument, "transcribe_image", return_value=TEST_TEXT): diff --git a/cognee/tests/integration/documents/PdfDocument_test.py b/cognee/tests/integration/documents/PdfDocument_test.py index c06e6aa1a..82d304b6c 100644 --- a/cognee/tests/integration/documents/PdfDocument_test.py +++ b/cognee/tests/integration/documents/PdfDocument_test.py @@ -20,7 +20,7 @@ def test_PdfDocument(): id=uuid.uuid4(), name="Test document.pdf", raw_data_location=test_file_path, - foreign_metadata="", + external_metadata="", mime_type="", ) diff --git a/cognee/tests/integration/documents/TextDocument_test.py b/cognee/tests/integration/documents/TextDocument_test.py index 6c0c42880..17db39be8 100644 --- a/cognee/tests/integration/documents/TextDocument_test.py +++ b/cognee/tests/integration/documents/TextDocument_test.py @@ -32,7 +32,7 @@ def test_TextDocument(input_file, chunk_size): id=uuid.uuid4(), name=input_file, raw_data_location=test_file_path, - foreign_metadata="", + external_metadata="", mime_type="", ) diff --git a/cognee/tests/integration/documents/UnstructuredDocument_test.py b/cognee/tests/integration/documents/UnstructuredDocument_test.py index a619b745e..81e804f07 100644 --- a/cognee/tests/integration/documents/UnstructuredDocument_test.py +++ b/cognee/tests/integration/documents/UnstructuredDocument_test.py @@ -39,7 +39,7 @@ def test_UnstructuredDocument(): id=uuid.uuid4(), name="example.pptx", raw_data_location=pptx_file_path, - foreign_metadata="", + external_metadata="", mime_type="application/vnd.openxmlformats-officedocument.presentationml.presentation", ) @@ -47,7 +47,7 @@ def test_UnstructuredDocument(): id=uuid.uuid4(), name="example.docx", raw_data_location=docx_file_path, - foreign_metadata="", + external_metadata="", mime_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", ) @@ -55,7 +55,7 @@ def test_UnstructuredDocument(): id=uuid.uuid4(), name="example.csv", raw_data_location=csv_file_path, - foreign_metadata="", + external_metadata="", mime_type="text/csv", ) @@ -63,7 +63,7 @@ def test_UnstructuredDocument(): id=uuid.uuid4(), name="example.xlsx", raw_data_location=xlsx_file_path, - foreign_metadata="", + external_metadata="", mime_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", )