fix: prevent Path Traversal vulnerability in upload endpoint
- Add sanitize_filename() function to validate and clean uploaded filenames - Remove path separators, traversal sequences, and control characters - Verify final paths stay within input directory using Path.resolve() - Return HTTP 400 errors for unsafe filenames - Prevents directory traversal attacks like ../../../etc/passwd
This commit is contained in:
parent
44daf51501
commit
60777d535b
1 changed files with 50 additions and 4 deletions
|
|
@ -62,6 +62,49 @@ router = APIRouter(
|
||||||
temp_prefix = "__tmp__"
|
temp_prefix = "__tmp__"
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_filename(filename: str, input_dir: Path) -> str:
|
||||||
|
"""
|
||||||
|
Sanitize uploaded filename to prevent Path Traversal attacks.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename: The original filename from the upload
|
||||||
|
input_dir: The target input directory
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Sanitized filename that is safe to use
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException: If the filename is unsafe or invalid
|
||||||
|
"""
|
||||||
|
# Basic validation
|
||||||
|
if not filename or not filename.strip():
|
||||||
|
raise HTTPException(status_code=400, detail="Filename cannot be empty")
|
||||||
|
|
||||||
|
# Remove path separators and traversal sequences
|
||||||
|
clean_name = filename.replace('/', '').replace('\\', '')
|
||||||
|
clean_name = clean_name.replace('..', '')
|
||||||
|
|
||||||
|
# Remove control characters and null bytes
|
||||||
|
clean_name = ''.join(c for c in clean_name if ord(c) >= 32 and c != '\x7f')
|
||||||
|
|
||||||
|
# Remove leading/trailing whitespace and dots
|
||||||
|
clean_name = clean_name.strip().strip('.')
|
||||||
|
|
||||||
|
# Check if anything is left after sanitization
|
||||||
|
if not clean_name:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid filename after sanitization")
|
||||||
|
|
||||||
|
# Verify the final path stays within the input directory
|
||||||
|
try:
|
||||||
|
final_path = (input_dir / clean_name).resolve()
|
||||||
|
if not final_path.is_relative_to(input_dir.resolve()):
|
||||||
|
raise HTTPException(status_code=400, detail="Unsafe filename detected")
|
||||||
|
except (OSError, ValueError):
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid filename")
|
||||||
|
|
||||||
|
return clean_name
|
||||||
|
|
||||||
|
|
||||||
class ScanResponse(BaseModel):
|
class ScanResponse(BaseModel):
|
||||||
"""Response model for document scanning operation
|
"""Response model for document scanning operation
|
||||||
|
|
||||||
|
|
@ -986,18 +1029,21 @@ def create_document_routes(
|
||||||
HTTPException: If the file type is not supported (400) or other errors occur (500).
|
HTTPException: If the file type is not supported (400) or other errors occur (500).
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
if not doc_manager.is_supported_file(file.filename):
|
# Sanitize filename to prevent Path Traversal attacks
|
||||||
|
safe_filename = sanitize_filename(file.filename, doc_manager.input_dir)
|
||||||
|
|
||||||
|
if not doc_manager.is_supported_file(safe_filename):
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
detail=f"Unsupported file type. Supported types: {doc_manager.supported_extensions}",
|
detail=f"Unsupported file type. Supported types: {doc_manager.supported_extensions}",
|
||||||
)
|
)
|
||||||
|
|
||||||
file_path = doc_manager.input_dir / file.filename
|
file_path = doc_manager.input_dir / safe_filename
|
||||||
# Check if file already exists
|
# Check if file already exists
|
||||||
if file_path.exists():
|
if file_path.exists():
|
||||||
return InsertResponse(
|
return InsertResponse(
|
||||||
status="duplicated",
|
status="duplicated",
|
||||||
message=f"File '{file.filename}' already exists in the input directory.",
|
message=f"File '{safe_filename}' already exists in the input directory.",
|
||||||
)
|
)
|
||||||
|
|
||||||
with open(file_path, "wb") as buffer:
|
with open(file_path, "wb") as buffer:
|
||||||
|
|
@ -1008,7 +1054,7 @@ def create_document_routes(
|
||||||
|
|
||||||
return InsertResponse(
|
return InsertResponse(
|
||||||
status="success",
|
status="success",
|
||||||
message=f"File '{file.filename}' uploaded successfully. Processing will continue in background.",
|
message=f"File '{safe_filename}' uploaded successfully. Processing will continue in background.",
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error /documents/upload: {file.filename}: {str(e)}")
|
logger.error(f"Error /documents/upload: {file.filename}: {str(e)}")
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue