refactor: Rename foreign to external metadata
Rename foreign metadata to external metadata for metadata coming outside of Cognee
This commit is contained in:
parent
77f0b45a0d
commit
80e67b0619
9 changed files with 14 additions and 14 deletions
|
|
@ -19,7 +19,7 @@ class Data(Base):
|
||||||
raw_data_location = Column(String)
|
raw_data_location = Column(String)
|
||||||
owner_id = Column(UUID, index=True)
|
owner_id = Column(UUID, index=True)
|
||||||
content_hash = Column(String)
|
content_hash = Column(String)
|
||||||
foreign_metadata = Column(JSON)
|
external_metadata = Column(JSON)
|
||||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||||
updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
|
updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ from cognee.infrastructure.engine import DataPoint
|
||||||
class Document(DataPoint):
|
class Document(DataPoint):
|
||||||
name: str
|
name: str
|
||||||
raw_data_location: str
|
raw_data_location: str
|
||||||
foreign_metadata: Optional[str]
|
external_metadata: Optional[str]
|
||||||
mime_type: str
|
mime_type: str
|
||||||
_metadata: dict = {"index_fields": ["name"], "type": "Document"}
|
_metadata: dict = {"index_fields": ["name"], "type": "Document"}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,7 @@ async def classify_documents(data_documents: list[Data]) -> list[Document]:
|
||||||
raw_data_location=data_item.raw_data_location,
|
raw_data_location=data_item.raw_data_location,
|
||||||
name=data_item.name,
|
name=data_item.name,
|
||||||
mime_type=data_item.mime_type,
|
mime_type=data_item.mime_type,
|
||||||
foreign_metadata=json.dumps(data_item.foreign_metadata, indent=4),
|
external_metadata=json.dumps(data_item.external_metadata, indent=4),
|
||||||
)
|
)
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ async def ingest_data(data: Any, dataset_name: str, user: User):
|
||||||
destination=destination,
|
destination=destination,
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_foreign_metadata_dict(data_item: Union[BinaryIO, str, Any]) -> dict[str, Any]:
|
def get_external_metadata_dict(data_item: Union[BinaryIO, str, Any]) -> dict[str, Any]:
|
||||||
if hasattr(data_item, "dict") and inspect.ismethod(getattr(data_item, "dict")):
|
if hasattr(data_item, "dict") and inspect.ismethod(getattr(data_item, "dict")):
|
||||||
return {"metadata": data_item.dict(), "origin": str(type(data_item))}
|
return {"metadata": data_item.dict(), "origin": str(type(data_item))}
|
||||||
else:
|
else:
|
||||||
|
|
@ -95,7 +95,7 @@ async def ingest_data(data: Any, dataset_name: str, user: User):
|
||||||
data_point.mime_type = file_metadata["mime_type"]
|
data_point.mime_type = file_metadata["mime_type"]
|
||||||
data_point.owner_id = user.id
|
data_point.owner_id = user.id
|
||||||
data_point.content_hash = file_metadata["content_hash"]
|
data_point.content_hash = file_metadata["content_hash"]
|
||||||
data_point.foreign_metadata = (get_foreign_metadata_dict(data_item),)
|
data_point.external_metadata = (get_external_metadata_dict(data_item),)
|
||||||
await session.merge(data_point)
|
await session.merge(data_point)
|
||||||
else:
|
else:
|
||||||
data_point = Data(
|
data_point = Data(
|
||||||
|
|
@ -106,7 +106,7 @@ async def ingest_data(data: Any, dataset_name: str, user: User):
|
||||||
mime_type=file_metadata["mime_type"],
|
mime_type=file_metadata["mime_type"],
|
||||||
owner_id=user.id,
|
owner_id=user.id,
|
||||||
content_hash=file_metadata["content_hash"],
|
content_hash=file_metadata["content_hash"],
|
||||||
foreign_metadata=get_foreign_metadata_dict(data_item),
|
external_metadata=get_external_metadata_dict(data_item),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check if data is already in dataset
|
# Check if data is already in dataset
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ def test_AudioDocument():
|
||||||
id=uuid.uuid4(),
|
id=uuid.uuid4(),
|
||||||
name="audio-dummy-test",
|
name="audio-dummy-test",
|
||||||
raw_data_location="",
|
raw_data_location="",
|
||||||
foreign_metadata="",
|
external_metadata="",
|
||||||
mime_type="",
|
mime_type="",
|
||||||
)
|
)
|
||||||
with patch.object(AudioDocument, "create_transcript", return_value=TEST_TEXT):
|
with patch.object(AudioDocument, "create_transcript", return_value=TEST_TEXT):
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ def test_ImageDocument():
|
||||||
id=uuid.uuid4(),
|
id=uuid.uuid4(),
|
||||||
name="image-dummy-test",
|
name="image-dummy-test",
|
||||||
raw_data_location="",
|
raw_data_location="",
|
||||||
foreign_metadata="",
|
external_metadata="",
|
||||||
mime_type="",
|
mime_type="",
|
||||||
)
|
)
|
||||||
with patch.object(ImageDocument, "transcribe_image", return_value=TEST_TEXT):
|
with patch.object(ImageDocument, "transcribe_image", return_value=TEST_TEXT):
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ def test_PdfDocument():
|
||||||
id=uuid.uuid4(),
|
id=uuid.uuid4(),
|
||||||
name="Test document.pdf",
|
name="Test document.pdf",
|
||||||
raw_data_location=test_file_path,
|
raw_data_location=test_file_path,
|
||||||
foreign_metadata="",
|
external_metadata="",
|
||||||
mime_type="",
|
mime_type="",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ def test_TextDocument(input_file, chunk_size):
|
||||||
id=uuid.uuid4(),
|
id=uuid.uuid4(),
|
||||||
name=input_file,
|
name=input_file,
|
||||||
raw_data_location=test_file_path,
|
raw_data_location=test_file_path,
|
||||||
foreign_metadata="",
|
external_metadata="",
|
||||||
mime_type="",
|
mime_type="",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ def test_UnstructuredDocument():
|
||||||
id=uuid.uuid4(),
|
id=uuid.uuid4(),
|
||||||
name="example.pptx",
|
name="example.pptx",
|
||||||
raw_data_location=pptx_file_path,
|
raw_data_location=pptx_file_path,
|
||||||
foreign_metadata="",
|
external_metadata="",
|
||||||
mime_type="application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
mime_type="application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -47,7 +47,7 @@ def test_UnstructuredDocument():
|
||||||
id=uuid.uuid4(),
|
id=uuid.uuid4(),
|
||||||
name="example.docx",
|
name="example.docx",
|
||||||
raw_data_location=docx_file_path,
|
raw_data_location=docx_file_path,
|
||||||
foreign_metadata="",
|
external_metadata="",
|
||||||
mime_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
mime_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -55,7 +55,7 @@ def test_UnstructuredDocument():
|
||||||
id=uuid.uuid4(),
|
id=uuid.uuid4(),
|
||||||
name="example.csv",
|
name="example.csv",
|
||||||
raw_data_location=csv_file_path,
|
raw_data_location=csv_file_path,
|
||||||
foreign_metadata="",
|
external_metadata="",
|
||||||
mime_type="text/csv",
|
mime_type="text/csv",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -63,7 +63,7 @@ def test_UnstructuredDocument():
|
||||||
id=uuid.uuid4(),
|
id=uuid.uuid4(),
|
||||||
name="example.xlsx",
|
name="example.xlsx",
|
||||||
raw_data_location=xlsx_file_path,
|
raw_data_location=xlsx_file_path,
|
||||||
foreign_metadata="",
|
external_metadata="",
|
||||||
mime_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
mime_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue