Compare commits
1 commit
main
...
pensar-aut
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d7ce31f7c2 |
1 changed files with 29 additions and 1 deletions
|
|
@ -7,6 +7,7 @@ from cognee.shared.logging_utils import get_logger
|
||||||
import requests
|
import requests
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.modules.users.methods import get_authenticated_user
|
from cognee.modules.users.methods import get_authenticated_user
|
||||||
|
import re
|
||||||
|
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
@ -31,12 +32,39 @@ def get_delete_router() -> APIRouter:
|
||||||
"""
|
"""
|
||||||
from cognee.api.v1.delete import delete as cognee_delete
|
from cognee.api.v1.delete import delete as cognee_delete
|
||||||
|
|
||||||
|
def is_safe_github_url(url: str) -> bool:
|
||||||
|
# Only allow https/http github URLs ending in .git, no query or fragment, and not starting with '-'
|
||||||
|
pattern = r'^https:\/\/github\.com\/[A-Za-z0-9_.-]+\/[A-Za-z0-9_.-]+\.git$'
|
||||||
|
pattern_http = r'^http:\/\/github\.com\/[A-Za-z0-9_.-]+\/[A-Za-z0-9_.-]+\.git$'
|
||||||
|
# Disallow filenames starting with "-"
|
||||||
|
if url.startswith('-'):
|
||||||
|
return False
|
||||||
|
# Disallow obvious shell metacharacters
|
||||||
|
if any(char in url for char in [';', '|', '`', '&', '>', '<']):
|
||||||
|
return False
|
||||||
|
# Disallow whitespace
|
||||||
|
if re.search(r'\s', url):
|
||||||
|
return False
|
||||||
|
# Disallow URLs with query params or fragments
|
||||||
|
if '?' in url or '#' in url:
|
||||||
|
return False
|
||||||
|
# Only allow proper GitHub .git repo URLs (no subdirectories)
|
||||||
|
if re.match(pattern, url) or re.match(pattern_http, url):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Handle each file in the list
|
# Handle each file in the list
|
||||||
results = []
|
results = []
|
||||||
for file in data:
|
for file in data:
|
||||||
if file.filename.startswith("http"):
|
if file.filename.startswith("http"):
|
||||||
if "github" in file.filename:
|
if "github" in file.filename:
|
||||||
|
# Validate github repo URL before using in subprocess
|
||||||
|
if not is_safe_github_url(file.filename):
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=400,
|
||||||
|
content={"error": "Invalid or potentially dangerous GitHub repository URL provided."},
|
||||||
|
)
|
||||||
# For GitHub repos, we need to get the content hash of each file
|
# For GitHub repos, we need to get the content hash of each file
|
||||||
repo_name = file.filename.split("/")[-1].replace(".git", "")
|
repo_name = file.filename.split("/")[-1].replace(".git", "")
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue