Secure api v2 (#1050)
<!-- .github/pull_request_template.md --> ## Description Modify endpoints to allow better security for different infrastructure needs and setups ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
parent
3c3c89a140
commit
cb45897d7d
6 changed files with 211 additions and 202 deletions
312
.env.template
312
.env.template
|
|
@ -1,189 +1,112 @@
|
|||
###############################################################################
|
||||
# NOTE: With default settings Cognee only needs an OpenAI LLM_API_KEY to be set.
|
||||
# The rest of the settings don't have to be set.
|
||||
# Default relational database: SQLite
|
||||
# Default vector database : LanceDB
|
||||
# Default graph database : Kuzu
|
||||
#
|
||||
# These default databases are all file-based, so no extra setup is needed
|
||||
# for local use.
|
||||
###############################################################################
|
||||
|
||||
###
|
||||
### DEV
|
||||
###
|
||||
|
||||
|
||||
TOKENIZERS_PARALLELISM="false"
|
||||
|
||||
###
|
||||
### LLM
|
||||
###
|
||||
|
||||
###
|
||||
### simple, "expensive", an OpenAPI key
|
||||
###
|
||||
################################################################################
|
||||
# 🧠 LLM Settings
|
||||
################################################################################
|
||||
|
||||
LLM_API_KEY="your_api_key"
|
||||
|
||||
###
|
||||
### DEV LLM, cheap with content filters
|
||||
###
|
||||
|
||||
LLM_MODEL="azure/gpt-4o-mini"
|
||||
LLM_ENDPOINT="https://DNS.azure.com/openai/deployments/gpt-4o-mini"
|
||||
LLM_API_KEY="<<TALK TO YOUR AZURE GUY"
|
||||
LLM_API_VERSION="2024-12-01-preview"
|
||||
#llm api version might not be relevant
|
||||
LLM_MODEL="openai/gpt-4o-mini"
|
||||
LLM_PROVIDER="openai"
|
||||
LLM_ENDPOINT=""
|
||||
LLM_API_VERSION=""
|
||||
LLM_MAX_TOKENS="16384"
|
||||
|
||||
EMBEDDING_MODEL="azure/text-embedding-3-large"
|
||||
EMBEDDING_ENDPOINT="https://DNS.openai.azure.com/openai/deployments/text-embedding-3-large"
|
||||
EMBEDDING_API_KEY="<<TALK TO YOUR AZURE GUY>"
|
||||
EMBEDDING_API_VERSION="2024-12-01-preview"
|
||||
EMBEDDING_DIMENSIONS=3072
|
||||
EMBEDDING_MAX_TOKENS=8191
|
||||
|
||||
###
|
||||
### free local LLM, install it
|
||||
###
|
||||
|
||||
LLM_API_KEY = "ollama"
|
||||
LLM_MODEL = "llama3.1:8b"
|
||||
LLM_PROVIDER = "ollama"
|
||||
LLM_ENDPOINT = "http://localhost:11434/v1"
|
||||
EMBEDDING_PROVIDER = "ollama"
|
||||
EMBEDDING_MODEL = "avr/sfr-embedding-mistral:latest"
|
||||
EMBEDDING_ENDPOINT = "http://localhost:11434/api/embeddings"
|
||||
EMBEDDING_DIMENSIONS = 4096
|
||||
HUGGINGFACE_TOKENIZER = "Salesforce/SFR-Embedding-Mistral"
|
||||
|
||||
###
|
||||
### openrouter, also frewe
|
||||
###
|
||||
|
||||
LLM_API_KEY="<<go-get-one-yourself"
|
||||
LLM_PROVIDER="custom"
|
||||
LLM_MODEL="openrouter/google/gemini-2.0-flash-lite-preview-02-05:free"
|
||||
LLM_ENDPOINT="https://openrouter.ai/api/v1"
|
||||
|
||||
###
|
||||
### deepinfra
|
||||
###
|
||||
|
||||
LLM_API_KEY="<<>>"
|
||||
LLM_PROVIDER="custom"
|
||||
LLM_MODEL="deepinfra/meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
LLM_ENDPOINT="https://api.deepinfra.com/v1/openai"
|
||||
|
||||
EMBEDDING_PROVIDER="openai"
|
||||
EMBEDDING_API_KEY="<<>>"
|
||||
EMBEDDING_MODEL="deepinfra/BAAI/bge-base-en-v1.5"
|
||||
EMBEDDING_MODEL="openai/text-embedding-3-large"
|
||||
EMBEDDING_ENDPOINT=""
|
||||
EMBEDDING_API_VERSION=""
|
||||
EMBEDDING_DIMENSIONS=3072
|
||||
EMBEDDING_MAX_TOKENS=8191
|
||||
# If embedding key is not provided same key set for LLM_API_KEY will be used
|
||||
#EMBEDDING_API_KEY="your_api_key"
|
||||
|
||||
###
|
||||
### DB
|
||||
###
|
||||
|
||||
###
|
||||
### db minimal/default
|
||||
###
|
||||
|
||||
GRAPH_DATABASE_PROVIDER="networkx"
|
||||
VECTOR_DB_PROVIDER="lancedb"
|
||||
DB_PROVIDER=sqlite
|
||||
DB_NAME=cognee_db
|
||||
|
||||
###
|
||||
### Relational options
|
||||
###
|
||||
################################################################################
|
||||
# 🗄️ Relational database settings
|
||||
################################################################################
|
||||
|
||||
DB_PROVIDER="sqlite"
|
||||
DB_NAME=cognee_db
|
||||
|
||||
DB_PROVIDER=postgres
|
||||
DB_NAME=cognee_db
|
||||
DB_HOST=127.0.0.1
|
||||
DB_PORT=5432
|
||||
DB_USERNAME=cognee
|
||||
DB_PASSWORD=cognee
|
||||
# -- To switch to Postgres / PGVector, uncomment and fill these: -------------
|
||||
#DB_PROVIDER=postgres
|
||||
#DB_NAME=cognee_db
|
||||
# To use Postgres with the Cognee backend in Docker compose use the following instead: DB_HOST=host.docker.internal
|
||||
#DB_HOST=127.0.0.1
|
||||
#DB_PORT=5432
|
||||
#DB_USERNAME=cognee
|
||||
#DB_PASSWORD=cognee
|
||||
|
||||
###
|
||||
### Graph options
|
||||
###
|
||||
|
||||
|
||||
#Default
|
||||
################################################################################
|
||||
# 🕸️ Graph Database settings
|
||||
################################################################################
|
||||
|
||||
# Default (local file-based)
|
||||
GRAPH_DATABASE_PROVIDER="kuzu"
|
||||
|
||||
#or if using remote
|
||||
# -- To switch to Remote Kuzu uncomment and fill these: -------------------------------------------------------------
|
||||
#GRAPH_DATABASE_PROVIDER="kuzu"
|
||||
#GRAPH_DATABASE_PROVIDER="kuzu-remote"
|
||||
#GRAPH_DATABASE_URL="http://localhost:8000"
|
||||
#GRAPH_DATABASE_USERNAME=XXX
|
||||
#GRAPH_DATABASE_PASSWORD=YYY
|
||||
|
||||
GRAPH_DATABASE_PROVIDER="kuzu"
|
||||
GRAPH_DATABASE_PROVIDER="kuzu-remote"
|
||||
GRAPH_DATABASE_URL="http://localhost:8000"
|
||||
GRAPH_DATABASE_USERNAME=XXX
|
||||
GRAPH_DATABASE_PASSWORD=YYY
|
||||
# -- To switch to Neo4j uncomment and fill these: -------------------------------------------------------------------
|
||||
#GRAPH_DATABASE_PROVIDER="neo4j"
|
||||
#GRAPH_DATABASE_URL=bolt://localhost:7687
|
||||
#GRAPH_DATABASE_USERNAME=neo4j
|
||||
#GRAPH_DATABASE_PASSWORD=localneo4j
|
||||
|
||||
# or if using neo4j
|
||||
|
||||
GRAPH_DATABASE_PROVIDER="neo4j"
|
||||
GRAPH_DATABASE_URL=bolt://localhost:7687
|
||||
GRAPH_DATABASE_USERNAME=neo4j
|
||||
GRAPH_DATABASE_PASSWORD=localneo4j
|
||||
|
||||
###
|
||||
### Vector options
|
||||
###
|
||||
################################################################################
|
||||
# 📐 Vector Database settings
|
||||
################################################################################
|
||||
|
||||
# Supported providers: pgvector | qdrant | weaviate | milvus | lancedb | chromadb
|
||||
VECTOR_DB_PROVIDER="lancedb"
|
||||
# Not needed if a cloud vector database is not used
|
||||
VECTOR_DB_URL=
|
||||
VECTOR_DB_KEY=
|
||||
|
||||
VECTOR_DB_PROVIDER="pgvector"
|
||||
|
||||
###
|
||||
### for release test
|
||||
###
|
||||
|
||||
LLM_API_KEY="..."
|
||||
|
||||
OPENAI_API_KEY="..."
|
||||
|
||||
MIGRATION_DB_PATH="~/Downloads/"
|
||||
MIGRATION_DB_NAME="Chinook_Sqlite.sqlite"
|
||||
MIGRATION_DB_PROVIDER="sqlite"
|
||||
|
||||
GRAPH_DATABASE_URL="bolt://54.246.89.112:7687"
|
||||
GRAPH_DATABASE_USERNAME="neo4j"
|
||||
GRAPH_DATABASE_PASSWORD="pleaseletmein"
|
||||
|
||||
###
|
||||
### ROOT DIRECTORY IF USING COGNEE LIB INSIDE A DOCKER
|
||||
###
|
||||
################################################################################
|
||||
# 📂 ROOT DIRECTORY IF USING COGNEE LIB INSIDE A DOCKER
|
||||
################################################################################
|
||||
# Set up the Cognee system directory. Cognee will store system files and databases here.
|
||||
|
||||
|
||||
DATA_ROOT_DIRECTORY='/cognee_data/data'
|
||||
SYSTEM_ROOT_DIRECTORY='/cognee_data/system'
|
||||
|
||||
|
||||
################################################################################
|
||||
# 🔄 MIGRATION (RELATIONAL → GRAPH) SETTINGS
|
||||
################################################################################
|
||||
|
||||
# Postgres specific parameters (Only if Postgres or PGVector is used). Do not use for cognee default simplest setup of SQLite-NetworkX-LanceDB
|
||||
# DB_USERNAME=cognee
|
||||
# DB_PASSWORD=cognee
|
||||
# To use Postgres with the Cognee backend in Docker compose use the following instead: DB_HOST=host.docker.internal
|
||||
# DB_HOST=127.0.0.1
|
||||
# DB_PORT=5432
|
||||
MIGRATION_DB_PATH="/path/to/migration/directory"
|
||||
MIGRATION_DB_NAME="migration_database.sqlite"
|
||||
MIGRATION_DB_PROVIDER="sqlite"
|
||||
|
||||
|
||||
|
||||
# Params for migrating relational database data to graph / Cognee ( PostgreSQL and SQLite supported )
|
||||
# MIGRATION_DB_PATH="/path/to/migration/directory"
|
||||
# MIGRATION_DB_NAME="migration_database.sqlite"
|
||||
# MIGRATION_DB_PROVIDER="sqlite"
|
||||
# Postgres specific parameters for migration
|
||||
# -- Postgres-specific migration params --------------------------------------
|
||||
# MIGRATION_DB_USERNAME=cognee
|
||||
# MIGRATION_DB_PASSWORD=cognee
|
||||
# MIGRATION_DB_HOST="127.0.0.1"
|
||||
# MIGRATION_DB_PORT=5432
|
||||
|
||||
# LITELLM Logging Level. Set to quiten down logging
|
||||
LITELLM_LOG="ERROR"
|
||||
################################################################################
|
||||
# 🔒 Security Settings
|
||||
################################################################################
|
||||
|
||||
# Set this environment variable to disable sending telemetry data
|
||||
# TELEMETRY_DISABLED=1
|
||||
# When set to false don't allow adding of local system files to Cognee. Should be set to False when Cognee is used as a backend.
|
||||
ACCEPT_LOCAL_FILE_PATH=True
|
||||
|
||||
# When set to false don't allow HTTP requests to be sent from Cognee.
|
||||
# This protects against Server Side Request Forgery when proper infrastructure is not in place.
|
||||
ALLOW_HTTP_REQUESTS=True
|
||||
|
||||
# Set this variable to True to enforce usage of backend access control for Cognee
|
||||
# Note: This is only currently supported by the following databases:
|
||||
|
|
@ -194,3 +117,94 @@ LITELLM_LOG="ERROR"
|
|||
# It enforces LanceDB and KuzuDB use and uses them to create databases per Cognee user + dataset
|
||||
ENABLE_BACKEND_ACCESS_CONTROL=False
|
||||
|
||||
################################################################################
|
||||
# 🛠️ DEV Settings
|
||||
################################################################################
|
||||
|
||||
ENV="local"
|
||||
|
||||
TOKENIZERS_PARALLELISM="false"
|
||||
|
||||
# LITELLM Logging Level. Set to quiet down logging
|
||||
LITELLM_LOG="ERROR"
|
||||
|
||||
# Set this environment variable to disable sending telemetry data
|
||||
# TELEMETRY_DISABLED=1
|
||||
|
||||
# Default User Configuration
|
||||
# DEFAULT_USER_EMAIL=""
|
||||
# DEFAULT_USER_PASSWORD=""
|
||||
|
||||
------------------------------- END OF POSSIBLE SETTINGS -------------------------------
|
||||
|
||||
|
||||
###############################################################################
|
||||
# 🧪 EXAMPLE OVERRIDES (commented out)
|
||||
###############################################################################
|
||||
# The blocks below show how to configure alternative providers.
|
||||
# Uncomment + fill values to switch.
|
||||
|
||||
########## Azure OpenAI #######################################################
|
||||
#LLM_MODEL="azure/gpt-4o-mini"
|
||||
#LLM_ENDPOINT="https://DNS.azure.com/openai/deployments/gpt-4o-mini"
|
||||
#LLM_API_KEY="<<TALK TO YOUR AZURE GUY"
|
||||
#LLM_API_VERSION="2024-12-01-preview"
|
||||
|
||||
## llm api version might not be relevant
|
||||
#LLM_MAX_TOKENS="16384"
|
||||
|
||||
#EMBEDDING_MODEL="azure/text-embedding-3-large"
|
||||
#EMBEDDING_ENDPOINT="https://DNS.openai.azure.com/openai/deployments/text-embedding-3-large"
|
||||
#EMBEDDING_API_KEY="<<TALK TO YOUR AZURE GUY>"
|
||||
#EMBEDDING_API_VERSION="2024-12-01-preview"
|
||||
#EMBEDDING_DIMENSIONS=3072
|
||||
#EMBEDDING_MAX_TOKENS=8191
|
||||
|
||||
########## Local LLM via Ollama ###############################################
|
||||
|
||||
#LLM_API_KEY ="ollama"
|
||||
#LLM_MODEL="llama3.1:8b"
|
||||
#LLM_PROVIDER="ollama"
|
||||
#LLM_ENDPOINT="http://localhost:11434/v1"
|
||||
#EMBEDDING_PROVIDER="ollama"
|
||||
#EMBEDDING_MODEL="avr/sfr-embedding-mistral:latest"
|
||||
#EMBEDDING_ENDPOINT="http://localhost:11434/api/embeddings"
|
||||
#EMBEDDING_DIMENSIONS=4096
|
||||
#HUGGINGFACE_TOKENIZER="Salesforce/SFR-Embedding-Mistral"
|
||||
|
||||
########## OpenRouter (also free) #########################################################
|
||||
|
||||
#LLM_API_KEY="<<go-get-one-yourself"
|
||||
#LLM_PROVIDER="custom"
|
||||
#LLM_MODEL="openrouter/google/gemini-2.0-flash-lite-preview-02-05:free"
|
||||
#LLM_ENDPOINT="https://openrouter.ai/api/v1"
|
||||
|
||||
########## DeepInfra ##########################################################
|
||||
|
||||
#LLM_API_KEY="<<>>"
|
||||
#LLM_PROVIDER="custom"
|
||||
#LLM_MODEL="deepinfra/meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
#LLM_ENDPOINT="https://api.deepinfra.com/v1/openai"
|
||||
|
||||
#EMBEDDING_PROVIDER="openai"
|
||||
#EMBEDDING_API_KEY="<<>>"
|
||||
#EMBEDDING_MODEL="deepinfra/BAAI/bge-base-en-v1.5"
|
||||
#EMBEDDING_ENDPOINT=""
|
||||
#EMBEDDING_API_VERSION=""
|
||||
#EMBEDDING_DIMENSIONS=3072
|
||||
#EMBEDDING_MAX_TOKENS=8191
|
||||
|
||||
|
||||
########## Release Test ###############################################
|
||||
|
||||
#LLM_API_KEY="..."
|
||||
|
||||
#OPENAI_API_KEY="..."
|
||||
|
||||
#MIGRATION_DB_PATH="~/Downloads/"
|
||||
#MIGRATION_DB_NAME="Chinook_Sqlite.sqlite"
|
||||
#MIGRATION_DB_PROVIDER="sqlite"
|
||||
|
||||
#GRAPH_DATABASE_URL="bolt://54.246.89.112:7687"
|
||||
#GRAPH_DATABASE_USERNAME="neo4j"
|
||||
#GRAPH_DATABASE_PASSWORD="pleaseletmein"
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ def get_add_router() -> APIRouter:
|
|||
raise ValueError("Either datasetId or datasetName must be provided.")
|
||||
|
||||
try:
|
||||
# TODO: Add check if HTTP Requests are enabled before allowing requests and git clone
|
||||
if isinstance(data, str) and data.startswith("http"):
|
||||
if "github" in data:
|
||||
# Perform git clone if the URL is from GitHub
|
||||
|
|
|
|||
|
|
@ -5,13 +5,16 @@ from sqlalchemy import select
|
|||
from sqlalchemy.sql import delete as sql_delete
|
||||
from cognee.modules.data.models import Data, DatasetData, Dataset
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from io import StringIO, BytesIO
|
||||
from io import BytesIO
|
||||
import hashlib
|
||||
import asyncio
|
||||
from uuid import UUID
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
from cognee.infrastructure.engine import DataPoint
|
||||
from cognee.modules.graph.utils.convert_node_to_data_point import get_all_subclasses
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||
from cognee.context_global_variables import set_database_global_context_variables
|
||||
from .exceptions import DocumentNotFoundError, DatasetNotFoundError, DocumentSubgraphNotFoundError
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
|
||||
|
|
@ -26,7 +29,9 @@ def get_text_content_hash(text: str) -> str:
|
|||
async def delete(
|
||||
data: Union[BinaryIO, List[BinaryIO], str, List[str]],
|
||||
dataset_name: str = "main_dataset",
|
||||
dataset_id: UUID = None,
|
||||
mode: str = "soft",
|
||||
user: User = None,
|
||||
):
|
||||
"""Delete a document and all its related nodes from both relational and graph databases.
|
||||
|
||||
|
|
@ -34,15 +39,27 @@ async def delete(
|
|||
data: The data to delete (file, URL, or text)
|
||||
dataset_name: Name of the dataset to delete from
|
||||
mode: "soft" (default) or "hard" - hard mode also deletes degree-one entity nodes
|
||||
user: User doing the operation, if none default user will be used.
|
||||
"""
|
||||
|
||||
if user is None:
|
||||
user = await get_default_user()
|
||||
|
||||
# Verify user has permission to work with given dataset. If dataset_id is given use it, if not use dataset_name
|
||||
dataset = await get_authorized_existing_datasets(
|
||||
[dataset_id] if dataset_id else [dataset_name], "delete", user
|
||||
)
|
||||
|
||||
# Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
|
||||
await set_database_global_context_variables(dataset[0].id, dataset[0].owner_id)
|
||||
|
||||
# Handle different input types
|
||||
if isinstance(data, str):
|
||||
if data.startswith("file://"): # It's a file path
|
||||
with open(data.replace("file://", ""), mode="rb") as file:
|
||||
classified_data = classify(file)
|
||||
content_hash = classified_data.get_metadata()["content_hash"]
|
||||
return await delete_single_document(content_hash, dataset_name, mode)
|
||||
return await delete_single_document(content_hash, dataset[0].id, mode)
|
||||
elif data.startswith("http"): # It's a URL
|
||||
import requests
|
||||
|
||||
|
|
@ -51,26 +68,26 @@ async def delete(
|
|||
file_data = BytesIO(response.content)
|
||||
classified_data = classify(file_data)
|
||||
content_hash = classified_data.get_metadata()["content_hash"]
|
||||
return await delete_single_document(content_hash, dataset_name, mode)
|
||||
return await delete_single_document(content_hash, dataset[0].id, mode)
|
||||
else: # It's a text string
|
||||
content_hash = get_text_content_hash(data)
|
||||
classified_data = classify(data)
|
||||
return await delete_single_document(content_hash, dataset_name, mode)
|
||||
return await delete_single_document(content_hash, dataset[0].id, mode)
|
||||
elif isinstance(data, list):
|
||||
# Handle list of inputs sequentially
|
||||
results = []
|
||||
for item in data:
|
||||
result = await delete(item, dataset_name, mode)
|
||||
result = await delete(item, dataset_name, dataset[0].id, mode)
|
||||
results.append(result)
|
||||
return {"status": "success", "message": "Multiple documents deleted", "results": results}
|
||||
else: # It's already a BinaryIO
|
||||
data.seek(0) # Ensure we're at the start of the file
|
||||
classified_data = classify(data)
|
||||
content_hash = classified_data.get_metadata()["content_hash"]
|
||||
return await delete_single_document(content_hash, dataset_name, mode)
|
||||
return await delete_single_document(content_hash, dataset[0].id, mode)
|
||||
|
||||
|
||||
async def delete_single_document(content_hash: str, dataset_name: str, mode: str = "soft"):
|
||||
async def delete_single_document(content_hash: str, dataset_id: UUID = None, mode: str = "soft"):
|
||||
"""Delete a single document by its content hash."""
|
||||
|
||||
# Delete from graph database
|
||||
|
|
@ -157,11 +174,11 @@ async def delete_single_document(content_hash: str, dataset_name: str, mode: str
|
|||
|
||||
# Get the dataset
|
||||
dataset = (
|
||||
await session.execute(select(Dataset).filter(Dataset.name == dataset_name))
|
||||
await session.execute(select(Dataset).filter(Dataset.id == dataset_id))
|
||||
).scalar_one_or_none()
|
||||
|
||||
if dataset is None:
|
||||
raise DatasetNotFoundError(f"Dataset not found: {dataset_name}")
|
||||
raise DatasetNotFoundError(f"Dataset not found: {dataset_id}")
|
||||
|
||||
# Delete from dataset_data table
|
||||
dataset_delete_stmt = sql_delete(DatasetData).where(
|
||||
|
|
@ -186,7 +203,7 @@ async def delete_single_document(content_hash: str, dataset_name: str, mode: str
|
|||
"message": "Document deleted from both graph and relational databases",
|
||||
"graph_deletions": deletion_result["deleted_counts"],
|
||||
"content_hash": content_hash,
|
||||
"dataset": dataset_name,
|
||||
"dataset": dataset_id,
|
||||
"deleted_node_ids": [
|
||||
str(node_id) for node_id in deleted_node_ids
|
||||
], # Convert back to strings for response
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
from fastapi import Form, UploadFile, Depends
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi import APIRouter
|
||||
from typing import List, Optional
|
||||
from typing import List
|
||||
from uuid import UUID
|
||||
import subprocess
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
import requests
|
||||
|
|
@ -18,6 +19,7 @@ def get_delete_router() -> APIRouter:
|
|||
async def delete(
|
||||
data: List[UploadFile],
|
||||
dataset_name: str = Form("main_dataset"),
|
||||
dataset_id: UUID = None,
|
||||
mode: str = Form("soft"),
|
||||
user: User = Depends(get_authenticated_user),
|
||||
):
|
||||
|
|
@ -35,6 +37,7 @@ def get_delete_router() -> APIRouter:
|
|||
# Handle each file in the list
|
||||
results = []
|
||||
for file in data:
|
||||
# TODO: Add check if HTTP Requests are enabled before allowing requests and git clone
|
||||
if file.filename.startswith("http"):
|
||||
if "github" in file.filename:
|
||||
# For GitHub repos, we need to get the content hash of each file
|
||||
|
|
@ -54,12 +57,22 @@ def get_delete_router() -> APIRouter:
|
|||
response.raise_for_status()
|
||||
file_data = response.content
|
||||
result = await cognee_delete(
|
||||
file_data, dataset_name=dataset_name, mode=mode
|
||||
file_data,
|
||||
dataset_name=dataset_name,
|
||||
dataset_id=dataset_id,
|
||||
mode=mode,
|
||||
user=user,
|
||||
)
|
||||
results.append(result)
|
||||
else:
|
||||
# Handle uploaded file by accessing its file attribute
|
||||
result = await cognee_delete(file.file, dataset_name=dataset_name, mode=mode)
|
||||
result = await cognee_delete(
|
||||
file.file,
|
||||
dataset_name=dataset_name,
|
||||
dataset_id=dataset_id,
|
||||
mode=mode,
|
||||
user=user,
|
||||
)
|
||||
results.append(result)
|
||||
|
||||
if len(results) == 1:
|
||||
|
|
|
|||
|
|
@ -176,43 +176,6 @@ class DataPoint(BaseModel):
|
|||
"""
|
||||
return self.model_validate_json(json_str)
|
||||
|
||||
# Pickle Serialization
|
||||
def to_pickle(self) -> bytes:
|
||||
"""
|
||||
Serialize the DataPoint instance to a byte format for pickling.
|
||||
|
||||
This method uses the built-in Python pickle module to convert the instance into a byte
|
||||
stream for persistence or transmission.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
||||
- bytes: The pickled byte representation of the DataPoint instance.
|
||||
"""
|
||||
return pickle.dumps(self.dict())
|
||||
|
||||
@classmethod
|
||||
def from_pickle(self, pickled_data: bytes):
|
||||
"""
|
||||
Deserialize a DataPoint instance from a pickled byte stream.
|
||||
|
||||
The method converts the byte stream back into a DataPoint instance by loading the data
|
||||
and validating it through the model's constructor.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- pickled_data (bytes): The bytes representation of a pickled DataPoint instance to
|
||||
be deserialized.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
||||
A new DataPoint instance created from the pickled data.
|
||||
"""
|
||||
data = pickle.loads(pickled_data)
|
||||
return self(**data)
|
||||
|
||||
def to_dict(self, **kwargs) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert the DataPoint instance to a dictionary representation.
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any], datase
|
|||
file_path = data_item
|
||||
# data is a file path
|
||||
elif data_item.startswith("file://") or data_item.startswith("/"):
|
||||
# TODO: Add check if ACCEPT_LOCAL_FILE_PATH is enabled, if it's not raise an error
|
||||
file_path = data_item.replace("file://", "")
|
||||
# data is text
|
||||
else:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue