cognee/infrastructure/loaders/models/LoaderResult.py
2025-07-13 20:24:52 +02:00

47 lines
1.5 KiB
Python

from pydantic import BaseModel
from typing import Optional, Dict, Any, List
from enum import Enum
class ContentType(Enum):
"""Content type classification for loaded files"""
TEXT = "text"
STRUCTURED = "structured"
BINARY = "binary"
class LoaderResult(BaseModel):
"""
Standardized output format for all file loaders.
This model ensures consistent data structure across all loader implementations,
following cognee's pattern of using Pydantic models for data validation.
"""
content: str # Primary text content extracted from file
metadata: Dict[str, Any] # File metadata (name, size, type, loader info, etc.)
content_type: ContentType # Content classification
chunks: Optional[List[str]] = None # Pre-chunked content if available
source_info: Optional[Dict[str, Any]] = None # Source-specific information
def to_dict(self) -> Dict[str, Any]:
"""
Convert the loader result to a dictionary format.
Returns:
Dict containing all loader result data with string-serialized content_type
"""
return {
"content": self.content,
"metadata": self.metadata,
"content_type": self.content_type.value,
"source_info": self.source_info or {},
"chunks": self.chunks,
}
class Config:
"""Pydantic configuration following cognee patterns"""
use_enum_values = True
validate_assignment = True