Add metadata_id attribute to Document and DocumentChunk, make ingest_with_metadata default
This commit is contained in:
parent
fd987ed61e
commit
7324564655
4 changed files with 8 additions and 3 deletions
|
|
@ -2,7 +2,7 @@ from typing import Union, BinaryIO
|
|||
from cognee.modules.users.models import User
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.modules.pipelines import run_tasks, Task
|
||||
from cognee.tasks.ingestion import save_data_to_storage, ingest_data
|
||||
from cognee.tasks.ingestion import ingest_data_with_metadata
|
||||
from cognee.infrastructure.databases.relational import create_db_and_tables as create_relational_db_and_tables
|
||||
from cognee.infrastructure.databases.vector.pgvector import create_db_and_tables as create_pgvector_db_and_tables
|
||||
|
||||
|
|
@ -14,8 +14,7 @@ async def add(data: Union[BinaryIO, list[BinaryIO], str, list[str]], dataset_nam
|
|||
user = await get_default_user()
|
||||
|
||||
tasks = [
|
||||
Task(save_data_to_storage, dataset_name),
|
||||
Task(ingest_data, dataset_name, user)
|
||||
Task(ingest_data_with_metadata, dataset_name, user)
|
||||
]
|
||||
|
||||
pipeline = run_tasks(tasks, data, "add_pipeline")
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ class TextChunker():
|
|||
is_part_of = self.document,
|
||||
chunk_index = self.chunk_index,
|
||||
cut_type = chunk_data["cut_type"],
|
||||
metadata_id = self.document.metadata_id
|
||||
)
|
||||
paragraph_chunks = []
|
||||
self.chunk_size = 0
|
||||
|
|
@ -48,6 +49,7 @@ class TextChunker():
|
|||
is_part_of = self.document,
|
||||
chunk_index = self.chunk_index,
|
||||
cut_type = paragraph_chunks[len(paragraph_chunks) - 1]["cut_type"],
|
||||
metadata_id = self.document.metadata_id
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
|
@ -65,6 +67,7 @@ class TextChunker():
|
|||
is_part_of = self.document,
|
||||
chunk_index = self.chunk_index,
|
||||
cut_type = paragraph_chunks[len(paragraph_chunks) - 1]["cut_type"],
|
||||
metadata_id = self.document.metadata_id
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
from cognee.infrastructure.engine import DataPoint
|
||||
from uuid import UUID
|
||||
|
||||
class Document(DataPoint):
|
||||
type: str
|
||||
name: str
|
||||
raw_data_location: str
|
||||
metadata_id: UUID
|
||||
|
||||
def read(self, chunk_size: int) -> str:
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ def classify_documents(data_documents: list[Data]) -> list[Document]:
|
|||
title=f"{data_item.name}.{data_item.extension}",
|
||||
raw_data_location=data_item.raw_data_location,
|
||||
name=data_item.name,
|
||||
metadata_id=data_item.metadata_id
|
||||
)
|
||||
for data_item in data_documents
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue