From b9dd2bd9165e7859389c42002cd4827232c67dee Mon Sep 17 00:00:00 2001 From: "pensarapp[bot]" <182705637+pensarapp[bot]@users.noreply.github.com> Date: Mon, 9 Jun 2025 16:50:10 +0000 Subject: [PATCH] Fix security issue: Unrestricted Prompt Path File Access Vulnerability (CWE-22, CWE-184, ML21) --- cognee/modules/retrieval/utils/completion.py | 45 +++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/cognee/modules/retrieval/utils/completion.py b/cognee/modules/retrieval/utils/completion.py index ed4bdcefc..2c397680b 100644 --- a/cognee/modules/retrieval/utils/completion.py +++ b/cognee/modules/retrieval/utils/completion.py @@ -1,6 +1,34 @@ +import os from cognee.infrastructure.llm.get_llm_client import get_llm_client from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt +# Define the directory where prompt templates are allowed to reside. +PROMPTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'prompts') +# Define the set of allowed prompt filenames. Extend as needed. +ALLOWED_PROMPT_FILENAMES = { + "summarize_search_results.txt", + # Add other allowed prompt template files here. +} + +def validate_prompt_path(prompt_path: str) -> str: + """ + Validates the prompt path to prevent path traversal and local file inclusion. + Only allows files within PROMPTS_DIR and with an allowed filename. + Returns the cleaned absolute path to the prompt file if valid, raises ValueError otherwise. + """ + # Only allow filenames (no directory component) + filename = os.path.basename(prompt_path) + + # Check for allowed filenames + if filename not in ALLOWED_PROMPT_FILENAMES: + raise ValueError(f"Invalid prompt filename: {filename}") + + # Construct absolute path to file in prompts directory + abs_path = os.path.abspath(os.path.join(PROMPTS_DIR, filename)) + # Ensure the path is within the prompts directory + if not abs_path.startswith(os.path.abspath(PROMPTS_DIR) + os.sep): + raise ValueError("Attempted path traversal in prompt path.") + return abs_path async def generate_completion( query: str, @@ -10,8 +38,13 @@ async def generate_completion( ) -> str: """Generates a completion using LLM with given context and prompts.""" args = {"question": query, "context": context} - user_prompt = render_prompt(user_prompt_path, args) - system_prompt = read_query_prompt(system_prompt_path) + + # Validate prompt paths + user_prompt_file = validate_prompt_path(user_prompt_path) + system_prompt_file = validate_prompt_path(system_prompt_path) + + user_prompt = render_prompt(user_prompt_file, args) + system_prompt = read_query_prompt(system_prompt_file) llm_client = get_llm_client() return await llm_client.acreate_structured_output( @@ -20,17 +53,19 @@ async def generate_completion( response_model=str, ) - async def summarize_text( text: str, prompt_path: str = "summarize_search_results.txt", ) -> str: """Summarizes text using LLM with the specified prompt.""" - system_prompt = read_query_prompt(prompt_path) + # Validate prompt path + prompt_file = validate_prompt_path(prompt_path) + + system_prompt = read_query_prompt(prompt_file) llm_client = get_llm_client() return await llm_client.acreate_structured_output( text_input=text, system_prompt=system_prompt, response_model=str, - ) + ) \ No newline at end of file