80 lines
3.1 KiB
Python
80 lines
3.1 KiB
Python
import argparse
|
|
import asyncio
|
|
from typing import Optional
|
|
|
|
from cognee.cli.reference import SupportsCliCommand
|
|
from cognee.cli import DEFAULT_DOCS_URL
|
|
import cognee.cli.echo as fmt
|
|
from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
|
|
|
|
|
|
class AddCommand(SupportsCliCommand):
|
|
command_string = "add"
|
|
help_string = "Add data to Cognee for knowledge graph processing"
|
|
docs_url = DEFAULT_DOCS_URL
|
|
description = """
|
|
Add data to Cognee for knowledge graph processing.
|
|
|
|
This is the first step in the Cognee workflow - it ingests raw data and prepares it
|
|
for processing. The function accepts various data formats including text, files, and
|
|
binary streams, then stores them in a specified dataset for further processing.
|
|
|
|
Supported Input Types:
|
|
- **Text strings**: Direct text content
|
|
- **File paths**: Local file paths (absolute paths starting with "/")
|
|
- **File URLs**: "file:///absolute/path" or "file://relative/path"
|
|
- **S3 paths**: "s3://bucket-name/path/to/file"
|
|
- **Lists**: Multiple files or text strings in a single call
|
|
|
|
Supported File Formats:
|
|
- Text files (.txt, .md, .csv)
|
|
- PDFs (.pdf)
|
|
- Images (.png, .jpg, .jpeg) - extracted via OCR/vision models
|
|
- Audio files (.mp3, .wav) - transcribed to text
|
|
- Code files (.py, .js, .ts, etc.) - parsed for structure and content
|
|
- Office documents (.docx, .pptx)
|
|
|
|
After adding data, use `cognee cognify` to process it into knowledge graphs.
|
|
"""
|
|
|
|
def configure_parser(self, parser: argparse.ArgumentParser) -> None:
|
|
parser.add_argument(
|
|
"data",
|
|
nargs="+",
|
|
help="Data to add: text content, file paths (/path/to/file), file URLs (file://path), S3 paths (s3://bucket/file), or mix of these",
|
|
)
|
|
parser.add_argument(
|
|
"--dataset-name",
|
|
"-d",
|
|
default="main_dataset",
|
|
help="Dataset name to organize your data (default: main_dataset)",
|
|
)
|
|
|
|
def execute(self, args: argparse.Namespace) -> None:
|
|
try:
|
|
# Import cognee here to avoid circular imports
|
|
import cognee
|
|
|
|
fmt.echo(f"Adding {len(args.data)} item(s) to dataset '{args.dataset_name}'...")
|
|
|
|
# Run the async add function
|
|
async def run_add():
|
|
try:
|
|
# Pass all data items as a list to cognee.add if multiple items
|
|
if len(args.data) == 1:
|
|
data_to_add = args.data[0]
|
|
else:
|
|
data_to_add = args.data
|
|
|
|
fmt.echo("Processing data...")
|
|
await cognee.add(data=data_to_add, dataset_name=args.dataset_name)
|
|
fmt.success(f"Successfully added data to dataset '{args.dataset_name}'")
|
|
except Exception as e:
|
|
raise CliCommandInnerException(f"Failed to add data: {str(e)}") from e
|
|
|
|
asyncio.run(run_add())
|
|
|
|
except Exception as e:
|
|
if isinstance(e, CliCommandInnerException):
|
|
raise CliCommandException(str(e), error_code=1) from e
|
|
raise CliCommandException(f"Failed to add data: {str(e)}", error_code=1) from e
|