Added related documentation
This commit is contained in:
parent
667bbd775e
commit
ae740eda96
1 changed files with 20 additions and 0 deletions
|
|
@ -78,6 +78,7 @@ async def add(
|
|||
- S3 path: "s3://my-bucket/documents/file.pdf"
|
||||
- List of mixed types: ["text content", "/path/file.pdf", "file://doc.txt", file_handle]
|
||||
- Binary file object: open("file.txt", "rb")
|
||||
- url: A web link url (https or http)
|
||||
dataset_name: Name of the dataset to store data in. Defaults to "main_dataset".
|
||||
Create separate datasets to organize different knowledge domains.
|
||||
user: User object for authentication and permissions. Uses default user if None.
|
||||
|
|
@ -88,6 +89,9 @@ async def add(
|
|||
vector_db_config: Optional configuration for vector database (for custom setups).
|
||||
graph_db_config: Optional configuration for graph database (for custom setups).
|
||||
dataset_id: Optional specific dataset UUID to use instead of dataset_name.
|
||||
extraction_rules: Optional dictionary of rules (e.g., CSS selectors, XPath) for extracting specific content from web pages using BeautifulSoup
|
||||
tavily_config: Optional configuration for Tavily API, including API key and extraction settings
|
||||
soup_crawler_config: Optional configuration for BeautifulSoup crawler, specifying concurrency, crawl delay, and extraction rules.
|
||||
|
||||
Returns:
|
||||
PipelineRunInfo: Information about the ingestion pipeline execution including:
|
||||
|
|
@ -136,6 +140,21 @@ async def add(
|
|||
|
||||
# Add a single file
|
||||
await cognee.add("/home/user/documents/analysis.pdf")
|
||||
|
||||
# Add a single url and bs4 extract ingestion method
|
||||
extraction_rules = {
|
||||
"title": "h1",
|
||||
"description": "p",
|
||||
"more_info": "a[href*='more-info']"
|
||||
}
|
||||
await cognee.add("https://example.com",extraction_rules=extraction_rules)
|
||||
|
||||
# Add a single url and tavily extract ingestion method
|
||||
Make sure to TAVILY_API_KEY = YOUR_TAVILY_API_KEY as a environment variable
|
||||
await cognee.add("https://example.com")
|
||||
|
||||
# Add multiple urls
|
||||
await cognee.add(["https://example.com","https://books.toscrape.com"])
|
||||
```
|
||||
|
||||
Environment Variables:
|
||||
|
|
@ -149,6 +168,7 @@ async def add(
|
|||
- DEFAULT_USER_PASSWORD: Custom default user password
|
||||
- VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
|
||||
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
|
||||
- TAVILY_API_KEY: YOUR_TAVILY_API_KEY
|
||||
|
||||
"""
|
||||
tasks = [
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue