feat: support suggested_questions_after_answer to be configed (#29254)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-12-08 10:27:02 +08:00
parent 88bfeee234
commit ee0fe8c7f9
5 changed files with 301 additions and 2 deletions
--- a/README.md
+++ b/README.md
@@ -139,6 +139,19 @@ Star Dify on GitHub and be instantly notified of new releases.

 If you need to customize the configuration, please refer to the comments in our [.env.example](docker/.env.example) file and update the corresponding values in your `.env` file. Additionally, you might need to make adjustments to the `docker-compose.yaml` file itself, such as changing image versions, port mappings, or volume mounts, based on your specific deployment environment and requirements. After making any changes, please re-run `docker-compose up -d`. You can find the full list of available environment variables [here](https://docs.dify.ai/getting-started/install-self-hosted/environments).

+#### Customizing Suggested Questions
+
+You can now customize the "Suggested Questions After Answer" feature to better fit your use case. For example, to generate longer, more technical questions:
+
+```bash
+# In your .env file
+SUGGESTED_QUESTIONS_PROMPT='Please help me predict the five most likely technical follow-up questions a developer would ask. Focus on implementation details, best practices, and architecture considerations. Keep each question between 40-60 characters. Output must be JSON array: ["question1","question2","question3","question4","question5"]'
+SUGGESTED_QUESTIONS_MAX_TOKENS=512
+SUGGESTED_QUESTIONS_TEMPERATURE=0.3
+```
+
+See the [Suggested Questions Configuration Guide](docs/suggested-questions-configuration.md) for detailed examples and usage instructions.
+
 ### Metrics Monitoring with Grafana

 Import the dashboard to Grafana, using Dify's PostgreSQL database as data source, to monitor metrics in granularity of apps, tenants, messages, and more.
--- a/api/.env.example
+++ b/api/.env.example
@@ -633,6 +633,22 @@ SWAGGER_UI_PATH=/swagger-ui.html
 # Set to false to export dataset IDs as plain text for easier cross-environment import
 DSL_EXPORT_ENCRYPT_DATASET_ID=true

+# Suggested Questions After Answer Configuration
+# These environment variables allow customization of the suggested questions feature
+#
+# Custom prompt for generating suggested questions (optional)
+# If not set, uses the default prompt that generates 3 questions under 20 characters each
+# Example: "Please help me predict the five most likely technical follow-up questions a developer would ask. Focus on implementation details, best practices, and architecture considerations. Keep each question between 40-60 characters. Output must be JSON array: [\"question1\",\"question2\",\"question3\",\"question4\",\"question5\"]"
+# SUGGESTED_QUESTIONS_PROMPT=
+
+# Maximum number of tokens for suggested questions generation (default: 256)
+# Adjust this value for longer questions or more questions
+# SUGGESTED_QUESTIONS_MAX_TOKENS=256
+
+# Temperature for suggested questions generation (default: 0.0)
+# Higher values (0.5-1.0) produce more creative questions, lower values (0.0-0.3) produce more focused questions
+# SUGGESTED_QUESTIONS_TEMPERATURE=0
+
 # Tenant isolated task queue configuration
 TENANT_ISOLATED_TASK_CONCURRENCY=1

--- a/api/core/llm_generator/llm_generator.py
+++ b/api/core/llm_generator/llm_generator.py
@@ -15,6 +15,8 @@ from core.llm_generator.prompts import (
    LLM_MODIFY_CODE_SYSTEM,
    LLM_MODIFY_PROMPT_SYSTEM,
    PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE,
+    SUGGESTED_QUESTIONS_MAX_TOKENS,
+    SUGGESTED_QUESTIONS_TEMPERATURE,
    SYSTEM_STRUCTURED_OUTPUT_GENERATE,
    WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE,
 )
@@ -124,7 +126,10 @@ class LLMGenerator:
        try:
            response: LLMResult = model_instance.invoke_llm(
                prompt_messages=list(prompt_messages),
-                model_parameters={"max_tokens": 256, "temperature": 0},
+                model_parameters={
+                    "max_tokens": SUGGESTED_QUESTIONS_MAX_TOKENS,
+                    "temperature": SUGGESTED_QUESTIONS_TEMPERATURE,
+                },
                stream=False,
            )

--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@@ -1,4 +1,6 @@
 # Written by YORKI MINAKO🤡, Edited by Xiaoyi, Edited by yasu-oh
+import os
+
 CONVERSATION_TITLE_PROMPT = """You are asked to generate a concise chat title by decomposing the user’s input into two parts: “Intention” and “Subject”.

 1. Detect Input Language
@@ -94,7 +96,8 @@ JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE = (
 )


-SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
+# Default prompt for suggested questions (can be overridden by environment variable)
+_DEFAULT_SUGGESTED_QUESTIONS_AFTER_ANSWER_PROMPT = (
    "Please help me predict the three most likely questions that human would ask, "
    "and keep each question under 20 characters.\n"
    "MAKE SURE your output is the SAME language as the Assistant's latest response. "
@@ -102,6 +105,15 @@ SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
    '["question1","question2","question3"]\n'
 )

+# Environment variable override for suggested questions prompt
+SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = os.getenv(
+    "SUGGESTED_QUESTIONS_PROMPT", _DEFAULT_SUGGESTED_QUESTIONS_AFTER_ANSWER_PROMPT
+)
+
+# Configurable LLM parameters for suggested questions (can be overridden by environment variables)
+SUGGESTED_QUESTIONS_MAX_TOKENS = int(os.getenv("SUGGESTED_QUESTIONS_MAX_TOKENS", "256"))
+SUGGESTED_QUESTIONS_TEMPERATURE = float(os.getenv("SUGGESTED_QUESTIONS_TEMPERATURE", "0"))
+
 GENERATOR_QA_PROMPT = (
    "<Task> The user will send a long text. Generate a Question and Answer pairs only using the knowledge"
    " in the long text. Please think step by step."
--- a/docs/suggested-questions-configuration.md
+++ b/docs/suggested-questions-configuration.md
@@ -0,0 +1,253 @@
+# Configurable Suggested Questions After Answer
+
+This document explains how to configure the "Suggested Questions After Answer" feature in Dify using environment variables.
+
+## Overview
+
+The suggested questions feature generates follow-up questions after each AI response to help users continue the conversation. By default, Dify generates 3 short questions (under 20 characters each), but you can customize this behavior to better fit your specific use case.
+
+## Environment Variables
+
+### `SUGGESTED_QUESTIONS_PROMPT`
+
+**Description**: Custom prompt template for generating suggested questions.
+
+**Default**:
+
+```
+Please help me predict the three most likely questions that human would ask, and keep each question under 20 characters.
+MAKE SURE your output is the SAME language as the Assistant's latest response.
+The output must be an array in JSON format following the specified schema:
+["question1","question2","question3"]
+```
+
+**Usage Examples**:
+
+1. **Technical/Developer Questions (Your Use Case)**:
+
+   ```bash
+   export SUGGESTED_QUESTIONS_PROMPT='Please help me predict the five most likely technical follow-up questions a developer would ask. Focus on implementation details, best practices, and architecture considerations. Keep each question between 40-60 characters. Output must be JSON array: ["question1","question2","question3","question4","question5"]'
+   ```
+
+1. **Customer Support**:
+
+   ```bash
+   export SUGGESTED_QUESTIONS_PROMPT='Generate 3 helpful follow-up questions that guide customers toward solving their own problems. Focus on troubleshooting steps and common issues. Keep questions under 30 characters. JSON format: ["q1","q2","q3"]'
+   ```
+
+1. **Educational Content**:
+
+   ```bash
+   export SUGGESTED_QUESTIONS_PROMPT='Create 4 thought-provoking questions that help students deeper understand the topic. Focus on concepts, relationships, and applications. Questions should be 25-40 characters. JSON: ["question1","question2","question3","question4"]'
+   ```
+
+1. **Multilingual Support**:
+
+   ```bash
+   export SUGGESTED_QUESTIONS_PROMPT='Generate exactly 3 follow-up questions in the same language as the conversation. Adapt question length appropriately for the language (Chinese: 10-15 chars, English: 20-30 chars, Arabic: 25-35 chars). Always output valid JSON array.'
+   ```
+
+**Important Notes**:
+
+- The prompt must request JSON array output format
+- Include language matching instructions for multilingual support
+- Specify clear character limits or question count requirements
+- Focus on your specific domain or use case
+
+### `SUGGESTED_QUESTIONS_MAX_TOKENS`
+
+**Description**: Maximum number of tokens for the LLM response.
+
+**Default**: `256`
+
+**Usage**:
+
+```bash
+export SUGGESTED_QUESTIONS_MAX_TOKENS=512  # For longer questions or more questions
+```
+
+**Recommended Values**:
+
+- `256`: Default, good for 3-4 short questions
+- `384`: Medium, good for 4-5 medium-length questions
+- `512`: High, good for 5+ longer questions or complex prompts
+- `1024`: Maximum, for very complex question generation
+
+### `SUGGESTED_QUESTIONS_TEMPERATURE`
+
+**Description**: Temperature parameter for LLM creativity.
+
+**Default**: `0.0`
+
+**Usage**:
+
+```bash
+export SUGGESTED_QUESTIONS_TEMPERATURE=0.3  # Balanced creativity
+```
+
+**Recommended Values**:
+
+- `0.0-0.2`: Very focused, predictable questions (good for technical support)
+- `0.3-0.5`: Balanced creativity and relevance (good for general use)
+- `0.6-0.8`: More creative, diverse questions (good for brainstorming)
+- `0.9-1.0`: Maximum creativity (good for educational exploration)
+
+## Configuration Examples
+
+### Example 1: Developer Documentation Chatbot
+
+```bash
+# .env file
+SUGGESTED_QUESTIONS_PROMPT='Generate exactly 5 technical follow-up questions that developers would ask after reading code documentation. Focus on implementation details, edge cases, performance considerations, and best practices. Each question should be 40-60 characters long. Output as JSON array: ["question1","question2","question3","question4","question5"]'
+SUGGESTED_QUESTIONS_MAX_TOKENS=512
+SUGGESTED_QUESTIONS_TEMPERATURE=0.3
+```
+
+### Example 2: Customer Service Bot
+
+```bash
+# .env file
+SUGGESTED_QUESTIONS_PROMPT='Create 3 actionable follow-up questions that help customers resolve their own issues. Focus on common problems, troubleshooting steps, and product features. Keep questions simple and under 25 characters. JSON: ["q1","q2","q3"]'
+SUGGESTED_QUESTIONS_MAX_TOKENS=256
+SUGGESTED_QUESTIONS_TEMPERATURE=0.1
+```
+
+### Example 3: Educational Tutor
+
+```bash
+# .env file
+SUGGESTED_QUESTIONS_PROMPT='Generate 4 thought-provoking questions that help students deepen their understanding of the topic. Focus on relationships between concepts, practical applications, and critical thinking. Questions should be 30-45 characters. Output: ["question1","question2","question3","question4"]'
+SUGGESTED_QUESTIONS_MAX_TOKENS=384
+SUGGESTED_QUESTIONS_TEMPERATURE=0.6
+```
+
+## Implementation Details
+
+### How It Works
+
+1. **Environment Variable Loading**: The system checks for environment variables at startup
+1. **Fallback to Defaults**: If no environment variables are set, original behavior is preserved
+1. **Prompt Template**: The custom prompt is used as-is, allowing full control over question generation
+1. **LLM Parameters**: Custom max_tokens and temperature are passed to the LLM API
+1. **JSON Parsing**: The system expects JSON array output and parses it accordingly
+
+### File Changes
+
+The implementation modifies these files:
+
+- `api/core/llm_generator/prompts.py`: Environment variable support
+- `api/core/llm_generator/llm_generator.py`: Custom LLM parameters
+- `api/.env.example`: Documentation of new variables
+
+### Backward Compatibility
+
+- ✅ **Zero Breaking Changes**: Works exactly as before if no environment variables are set
+- ✅ **Default Behavior Preserved**: Original prompt and parameters used as fallbacks
+- ✅ **No Database Changes**: Pure environment variable configuration
+- ✅ **No UI Changes Required**: Configuration happens at deployment level
+
+## Testing Your Configuration
+
+### Local Testing
+
+1. Set environment variables:
+
+   ```bash
+   export SUGGESTED_QUESTIONS_PROMPT='Your test prompt...'
+   export SUGGESTED_QUESTIONS_MAX_TOKENS=300
+   export SUGGESTED_QUESTIONS_TEMPERATURE=0.4
+   ```
+
+1. Start Dify API:
+
+   ```bash
+   cd api
+   python -m flask run --host 0.0.0.0 --port=5001 --debug
+   ```
+
+1. Test the feature in your chat application and verify the questions match your expectations.
+
+### Monitoring
+
+Monitor the following when testing:
+
+- **Question Quality**: Are questions relevant and helpful?
+- **Language Matching**: Do questions match the conversation language?
+- **JSON Format**: Is output properly formatted as JSON array?
+- **Length Constraints**: Do questions follow your length requirements?
+- **Response Time**: Are the custom parameters affecting performance?
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Invalid JSON Output**:
+
+   - **Problem**: LLM doesn't return valid JSON
+   - **Solution**: Make sure your prompt explicitly requests JSON array format
+
+1. **Questions Too Long/Short**:
+
+   - **Problem**: Questions don't follow length constraints
+   - **Solution**: Be more specific about character limits in your prompt
+
+1. **Too Few/Many Questions**:
+
+   - **Problem**: Wrong number of questions generated
+   - **Solution**: Clearly specify the exact number in your prompt
+
+1. **Language Mismatch**:
+
+   - **Problem**: Questions in wrong language
+   - **Solution**: Include explicit language matching instructions in prompt
+
+1. **Performance Issues**:
+
+   - **Problem**: Slow response times
+   - **Solution**: Reduce `SUGGESTED_QUESTIONS_MAX_TOKENS` or simplify prompt
+
+### Debug Logging
+
+To debug your configuration, you can temporarily add logging to see the actual prompt and parameters being used:
+
+```python
+import logging
+logger = logging.getLogger(__name__)
+
+# In llm_generator.py
+logger.info(f"Suggested questions prompt: {prompt}")
+logger.info(f"Max tokens: {SUGGESTED_QUESTIONS_MAX_TOKENS}")
+logger.info(f"Temperature: {SUGGESTED_QUESTIONS_TEMPERATURE}")
+```
+
+## Migration Guide
+
+### From Default Configuration
+
+If you're currently using the default configuration and want to customize:
+
+1. **Assess Your Needs**: Determine what aspects need customization (question count, length, domain focus)
+1. **Design Your Prompt**: Write a custom prompt that addresses your specific use case
+1. **Choose Parameters**: Select appropriate max_tokens and temperature values
+1. **Test Incrementally**: Start with small changes and test thoroughly
+1. **Deploy Gradually**: Roll out to production after successful testing
+
+### Best Practices
+
+1. **Start Simple**: Begin with minimal changes to the default prompt
+1. **Test Thoroughly**: Test with various conversation types and languages
+1. **Monitor Performance**: Watch for impact on response times and costs
+1. **Get User Feedback**: Collect feedback on question quality and relevance
+1. **Iterate**: Refine your configuration based on real-world usage
+
+## Future Enhancements
+
+This environment variable approach provides immediate customization while maintaining backward compatibility. Future enhancements could include:
+
+1. **App-Level Configuration**: Different apps with different suggested question settings
+1. **Dynamic Prompts**: Context-aware prompts based on conversation content
+1. **Multi-Model Support**: Different models for different types of questions
+1. **Analytics Dashboard**: Insights into question effectiveness and usage patterns
+1. **A/B Testing**: Built-in testing of different prompt configurations
+
+For now, the environment variable approach offers a simple, reliable way to customize the suggested questions feature for your specific needs.