Merge pull request #201 from topoteretes/fix/small-cleanup

Small cleanup pull request
This commit is contained in:
0xideas 2024-11-12 15:45:29 +01:00 committed by GitHub
commit e52d262000
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 11 additions and 6 deletions

View file

@ -1,12 +1,17 @@
from cognee.modules.data.models import Data
from cognee.modules.data.processing.document_types import Document, PdfDocument, AudioDocument, ImageDocument, TextDocument
EXTENSION_TO_DOCUMENT_CLASS = {
"pdf": PdfDocument,
"audio": AudioDocument,
"image": ImageDocument,
"pdf": TextDocument,
"txt": TextDocument
}
def classify_documents(data_documents: list[Data]) -> list[Document]:
documents = [
PdfDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "pdf" else
AudioDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "audio" else
ImageDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "image" else
TextDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location)
EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location)
for data_item in data_documents
]

View file

@ -18,7 +18,7 @@ from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_eng
from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file
from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException
from cognee.modules.data.extraction.knowledge_graph.add_model_class_to_graph import add_model_class_to_graph
from cognee.tasks.infer_data_ontology.models.models import NodeModel, GraphOntology
from cognee.tasks.graph.models import NodeModel, GraphOntology
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.engine.utils import generate_node_id, generate_node_name

View file

@ -5,7 +5,7 @@ from pydantic import BaseModel
from cognee.modules.data.extraction.extract_summary import extract_summary
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.tasks.storage import add_data_points
from .models.TextSummary import TextSummary
from .models import TextSummary
async def summarize_text(data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel]):
if len(data_chunks) == 0: