From 7d9d31b6f3fcefc0844e5d85e3f6db061174b593 Mon Sep 17 00:00:00 2001 From: daohp Date: Wed, 3 Dec 2025 14:24:10 +0700 Subject: [PATCH] feat: Add session history feature to LightRAG API - Introduced a new session history feature that tracks and manages conversation history across multiple chat sessions. - Implemented REST API endpoints for creating, listing, retrieving, and deleting chat sessions. - Enhanced error handling and logging for session management operations. - Updated README.md to include documentation for the new session history feature and its usage. --- README.md | 74 +++++++ SESSION_ALWAYS_ON.md | 272 +++++++++++++++++++++++++ SESSION_CONFIG_SIMPLIFIED.md | 202 ++++++++++++++++++ SESSION_FINAL_SIMPLIFICATION.md | 169 +++++++++++++++ SESSION_INTEGRATION_SUMMARY.md | 259 +++++++++++++++++++++++ docker-compose.yml | 2 +- docs/SessionHistoryMigration.md | 193 ++++++++++++++++++ lightrag/api/lightrag_server.py | 12 +- lightrag/api/routers/history_routes.py | 190 ++++++++++++----- lightrag/api/session_database.py | 163 +++++++++++++++ lightrag/api/session_manager.py | 226 ++++++++++++++++++++ lightrag/api/session_models.py | 65 ++++++ lightrag/api/session_schemas.py | 65 ++++++ scripts/migrate_session_history.sh | 133 ++++++++++++ 14 files changed, 1969 insertions(+), 56 deletions(-) create mode 100644 SESSION_ALWAYS_ON.md create mode 100644 SESSION_CONFIG_SIMPLIFIED.md create mode 100644 SESSION_FINAL_SIMPLIFICATION.md create mode 100644 SESSION_INTEGRATION_SUMMARY.md create mode 100644 docs/SessionHistoryMigration.md create mode 100644 lightrag/api/session_database.py create mode 100644 lightrag/api/session_manager.py create mode 100644 lightrag/api/session_models.py create mode 100644 lightrag/api/session_schemas.py create mode 100644 scripts/migrate_session_history.sh diff --git a/README.md b/README.md index 3147e23c..05bce2c0 100644 --- a/README.md +++ b/README.md @@ -1552,6 +1552,80 @@ When switching between different embedding models, you must clear the data direc The LightRAG Server is designed to provide Web UI and API support. **For more information about LightRAG Server, please refer to [LightRAG Server](./lightrag/api/README.md).** +## Session History Feature + +LightRAG includes a built-in session history feature that automatically tracks and manages conversation history across multiple chat sessions. This feature is always enabled and requires no configuration. + +### Features + +- **Session Management**: Create, list, and delete chat sessions +- **Message History**: Store and retrieve conversation history +- **Citation Tracking**: Track source documents and citations for each response +- **User Isolation**: Sessions are isolated per user +- **Always Available**: Automatically enabled when PostgreSQL is configured + +### How It Works + +Session history uses the same PostgreSQL instance as LightRAG. Session tables are automatically created in your database - no additional setup required! + +### Docker Deployment + +Session history uses the same PostgreSQL as LightRAG: + +```bash +# Start LightRAG - session tables created automatically +docker compose up -d + +# View logs +docker compose logs -f lightrag +``` + +### API Endpoints + +The session history feature provides the following REST API endpoints: + +- `POST /history/sessions` - Create a new chat session +- `GET /history/sessions` - List all sessions for current user +- `GET /history/sessions/{session_id}/history` - Get message history for a session +- `DELETE /history/sessions/{session_id}` - Delete a session and its messages + +### Example Usage + +```python +import requests + +# Create a new session +response = requests.post( + "http://localhost:9621/history/sessions", + json={"title": "My Research Session"}, + headers={"X-User-ID": "user123"} +) +session_id = response.json()["id"] + +# Query with session context +response = requests.post( + "http://localhost:9621/query", + json={ + "query": "What are the main findings?", + "mode": "hybrid", + "session_id": session_id + } +) + +# Get session history +response = requests.get( + f"http://localhost:9621/history/sessions/{session_id}/history" +) +messages = response.json() +``` + +### Troubleshooting + +If session history endpoints are not available, check: +1. PostgreSQL is running and accessible +2. `POSTGRES_*` environment variables are correctly configured +3. Server logs for initialization errors + ## Graph Visualization The LightRAG Server offers a comprehensive knowledge graph visualization feature. It supports various gravity layouts, node queries, subgraph filtering, and more. **For more information about LightRAG Server, please refer to [LightRAG Server](./lightrag/api/README.md).** diff --git a/SESSION_ALWAYS_ON.md b/SESSION_ALWAYS_ON.md new file mode 100644 index 00000000..4844f705 --- /dev/null +++ b/SESSION_ALWAYS_ON.md @@ -0,0 +1,272 @@ +# Session History: Always-On Feature + +## Final Simplification + +Based on user feedback, we've removed the `SESSION_HISTORY_ENABLED` variable completely. Session history is now **always enabled** as a core feature of LightRAG Server. + +## Rationale + +### Why Remove the Toggle? + +1. **It's Always Useful**: Session history is a fundamental feature for chat applications +2. **No Overhead**: If you don't use it, it doesn't impact performance +3. **Graceful Degradation**: If PostgreSQL fails, server still starts (endpoints just unavailable) +4. **Simpler UX**: One less thing for users to configure +5. **Modern Default**: Chat history should be expected, not optional + +### What Changed + +#### Before (With Toggle) +```bash +SESSION_HISTORY_ENABLED=true # Required this line +``` + +#### After (Always On) +```bash +# Nothing needed! Session history just works +``` + +## How It Works Now + +### Automatic Initialization + +When LightRAG Server starts: + +1. ✅ Reads `POSTGRES_*` environment variables +2. ✅ Connects to PostgreSQL +3. ✅ Creates session tables automatically (if they don't exist) +4. ✅ Enables `/history/*` endpoints +5. ✅ Ready to use! + +### Graceful Failure + +If PostgreSQL is not available: + +``` +WARNING: Session history initialization failed: connection refused +WARNING: Session history endpoints will be unavailable +INFO: Server is ready to accept connections! 🚀 +``` + +- ✅ Server still starts +- ✅ Other features work normally +- ✅ Session endpoints return 503 (service unavailable) +- ✅ No crash or hard failure + +## Configuration + +### Complete Setup + +```bash +# File: .env +POSTGRES_HOST=localhost +POSTGRES_PORT=5432 +POSTGRES_USER=postgres +POSTGRES_PASSWORD=your_password +POSTGRES_DATABASE=lightrag_db + +# That's it! Session history automatically enabled +``` + +### No PostgreSQL? + +If you don't have PostgreSQL: +- LightRAG Server will start normally +- Session endpoints won't be available +- All other features work as expected +- Check logs for: "Session history endpoints will be unavailable" + +## Benefits + +### For Users + +1. ✅ **Zero Configuration**: No ENV variable to set +2. ✅ **Just Works**: Automatic if PostgreSQL is available +3. ✅ **No Surprises**: Consistent behavior +4. ✅ **Less Confusion**: No "should I enable this?" questions + +### For Developers + +1. ✅ **Cleaner Code**: No conditional logic for enable/disable +2. ✅ **Simpler Tests**: Always test with feature enabled +3. ✅ **Better UX**: Feature discovery through API docs +4. ✅ **Modern Architecture**: Features are on by default + +## Migration + +### From `SESSION_HISTORY_ENABLED=true` + +Simply remove the line from your `.env`: + +```bash +# Remove this line +# SESSION_HISTORY_ENABLED=true + +# Everything else stays the same +``` + +### From `SESSION_HISTORY_ENABLED=false` + +If you had it disabled: + +```bash +# Remove this line +# SESSION_HISTORY_ENABLED=false + +# Session history will now be available +# Just don't use the endpoints if you don't need them +``` + +## API Endpoints + +Always available (when PostgreSQL is configured): + +``` +POST /history/sessions - Create session +GET /history/sessions - List sessions +GET /history/sessions/{id}/history - Get messages +DELETE /history/sessions/{id} - Delete session +``` + +## Database Tables + +Automatically created in `POSTGRES_DATABASE`: + +- `lightrag_chat_sessions_history` +- `lightrag_chat_messages_history` +- `lightrag_message_citations_history` + +## Use Cases + +### Development +```bash +# Just configure PostgreSQL +POSTGRES_HOST=localhost +POSTGRES_DATABASE=dev_lightrag + +# Session history automatically available! +``` + +### Production +```bash +# Production database +POSTGRES_HOST=prod-db.example.com +POSTGRES_DATABASE=lightrag_prod + +# Session history automatically available! +``` + +### Testing Without Sessions +```bash +# Don't configure PostgreSQL +# Or use SQLite for other storage + +# Session endpoints return 503 +# Rest of LightRAG works fine +``` + +## Implementation + +### Server Initialization + +```python +# In lightrag_server.py +app = FastAPI(**app_kwargs) + +# Initialize session history - always attempt +try: + session_db_manager = get_session_db_manager() + app.include_router(history_router) + logger.info("Session history initialized") +except Exception as e: + logger.warning(f"Session history unavailable: {e}") + # Server continues normally +``` + +### Key Points + +- ✅ No `if SESSION_HISTORY_ENABLED` checks +- ✅ Try to initialize, log warning if fails +- ✅ Server continues regardless +- ✅ Clean and simple + +## Philosophy + +### Modern Software Defaults + +Good software should: +1. **Work out of the box** - Session history just works +2. **Fail gracefully** - Server starts even if sessions fail +3. **Be discoverable** - Feature is in API docs by default +4. **Require minimal config** - Use existing PostgreSQL + +### KISS Principle + +- ❌ Before: "Do I need session history? Should I enable it?" +- ✅ After: "It's there if I need it!" + +### Progressive Enhancement + +- Basic: LightRAG without PostgreSQL +- Enhanced: LightRAG with PostgreSQL + Session History +- No configuration needed to progress! + +## Summary + +| Aspect | Before | After | +|--------|--------|-------| +| **Configuration** | `SESSION_HISTORY_ENABLED=true` | Nothing needed | +| **If PostgreSQL available** | Enabled | Enabled | +| **If PostgreSQL unavailable** | Disabled | Graceful warning | +| **User decision needed** | Yes | No | +| **Code complexity** | Conditional logic | Always attempt | + +## Quote from User + +> "Biến này lúc nào cũng = true thì cần gì nữa, xóa luôn" + +**Exactly right!** If it's always `true`, why have it at all? + +Session history is now a **first-class citizen** of LightRAG Server - always available, no questions asked! 🎉 + +--- + +## Technical Notes + +### Database Connection + +Uses the standard SQLAlchemy pattern: + +```python +class SessionDatabaseConfig: + def __init__(self): + self.host = os.getenv("POSTGRES_HOST", "localhost") + self.port = os.getenv("POSTGRES_PORT", "5432") + # ... etc +``` + +No special handling, no overrides, no complexity. + +### Graceful Degradation + +Exception handling ensures server resilience: + +```python +try: + session_db_manager = get_session_db_manager() + app.include_router(history_router) +except Exception as e: + logger.warning(f"Session history unavailable: {e}") + # Server continues +``` + +### Zero Impact + +If session endpoints aren't used: +- ✅ No queries to database +- ✅ No performance overhead +- ✅ No resource consumption +- ✅ Just available when needed + +Perfect! 🎯 + diff --git a/SESSION_CONFIG_SIMPLIFIED.md b/SESSION_CONFIG_SIMPLIFIED.md new file mode 100644 index 00000000..b87dfaf2 --- /dev/null +++ b/SESSION_CONFIG_SIMPLIFIED.md @@ -0,0 +1,202 @@ +# Session History Configuration - Simplified Approach + +## Summary of Changes + +Based on user feedback, the session history configuration has been **simplified** to avoid unnecessary complexity. + +## What Changed + +### Before (Over-complicated) +```bash +# Required separate PostgreSQL configuration +SESSION_POSTGRES_HOST=localhost +SESSION_POSTGRES_PORT=5433 +SESSION_POSTGRES_USER=lightrag +SESSION_POSTGRES_PASSWORD=lightrag_password +SESSION_POSTGRES_DATABASE=lightrag_sessions +``` +- ❌ Required users to configure separate database +- ❌ More environment variables to manage +- ❌ Confusion about when to use which settings + +### After (Simplified) +```bash +# Just enable - uses existing PostgreSQL automatically +SESSION_HISTORY_ENABLED=true +``` +- ✅ Uses existing `POSTGRES_*` configuration by default +- ✅ Minimal configuration needed +- ✅ Session tables created in same database as LightRAG +- ✅ Still allows separate database if needed (optional) + +## Configuration Logic + +The system now follows this priority order: + +1. **`SESSION_DATABASE_URL`** (if set) - Full custom connection string +2. **`SESSION_POSTGRES_*`** (if set) - Override for separate database +3. **`POSTGRES_*`** (default) - Shared with LightRAG ✨ **RECOMMENDED** + +## Use Cases + +### 99% of Users (Recommended) +```bash +# In .env - just enable it! +SESSION_HISTORY_ENABLED=true + +# Session tables will be created in POSTGRES_DATABASE automatically +# No additional configuration needed +``` + +**Result**: +- Session tables: `lightrag_chat_sessions_history`, `lightrag_chat_messages_history`, `lightrag_message_citations_history` +- Created in the same PostgreSQL database as LightRAG storage +- Uses existing PostgreSQL connection settings + +### Advanced Users (Separate Database) +```bash +SESSION_HISTORY_ENABLED=true + +# Only if you REALLY need separate database +SESSION_POSTGRES_HOST=other-host +SESSION_POSTGRES_DATABASE=dedicated_sessions_db +``` + +## Docker Compose Changes + +### Simplified (Default) +```yaml +services: + lightrag: + # ... existing config + # No session-db dependency needed! +``` + +The separate `session-db` service is now **commented out** in `docker-compose.yml` since most users don't need it. + +### If You Need Separate Database +Uncomment the `session-db` service in `docker-compose.yml`. + +## Benefits + +1. **Simpler Setup**: One less thing to configure +2. **Fewer ENV Variables**: Less confusion about what to set +3. **Easier Docker**: No need for separate database container in most cases +4. **Better Defaults**: Works out of the box with existing PostgreSQL +5. **Still Flexible**: Can override if needed for advanced use cases + +## Migration from Old Config + +If you already have `SESSION_POSTGRES_*` set in your `.env`: + +**Option 1: Simplify (Recommended)** +```bash +# Remove these lines from .env +# SESSION_POSTGRES_HOST=... +# SESSION_POSTGRES_PORT=... +# SESSION_POSTGRES_USER=... +# SESSION_POSTGRES_PASSWORD=... +# SESSION_POSTGRES_DATABASE=... + +# Keep only this +SESSION_HISTORY_ENABLED=true +``` + +**Option 2: Keep Separate Database** +```bash +# Keep your SESSION_POSTGRES_* settings if you need separate database +SESSION_HISTORY_ENABLED=true +SESSION_POSTGRES_HOST=other-host +# ... other settings +``` + +## Database Tables + +Whether you use shared or separate PostgreSQL, these tables are created: + +| Table | Purpose | +|-------|---------| +| `lightrag_chat_sessions_history` | Chat sessions | +| `lightrag_chat_messages_history` | Individual messages | +| `lightrag_message_citations_history` | Source citations | + +## Why This Makes Sense + +1. **Most users have ONE PostgreSQL instance** - No need to run multiple +2. **Session data is not that large** - Doesn't need separate database +3. **Simpler is better** - Follows principle of least configuration +4. **Still allows separation** - When needed for production/security reasons + +## Example Scenarios + +### Scenario 1: Development/Testing +```bash +# .env +POSTGRES_HOST=localhost +POSTGRES_DATABASE=lightrag_dev +SESSION_HISTORY_ENABLED=true +``` +✅ Everything in one database, easy to reset/cleanup + +### Scenario 2: Production (Simple) +```bash +# .env +POSTGRES_HOST=prod-db.example.com +POSTGRES_DATABASE=lightrag_prod +SESSION_HISTORY_ENABLED=true +``` +✅ Production database with both LightRAG and session data + +### Scenario 3: Production (Separated) +```bash +# .env +POSTGRES_HOST=prod-db.example.com +POSTGRES_DATABASE=lightrag_data + +SESSION_POSTGRES_HOST=sessions-db.example.com +SESSION_POSTGRES_DATABASE=sessions +``` +✅ Separate databases for data isolation (if required by architecture) + +## Implementation Details + +The fallback logic in `session_database.py`: + +```python +# Uses 'or' instead of nested getenv for clarity +self.host = os.getenv("SESSION_POSTGRES_HOST") or os.getenv("POSTGRES_HOST", "localhost") +self.port = os.getenv("SESSION_POSTGRES_PORT") or os.getenv("POSTGRES_PORT", "5432") +# ... etc +``` + +This means: +- If `SESSION_POSTGRES_HOST` is set → use it +- If not set or empty → fallback to `POSTGRES_HOST` +- If that's also not set → use default "localhost" + +## Logging + +The system logs which configuration is being used: + +``` +INFO: Session database: shared with LightRAG at localhost:5432/lightrag_db +``` +or +``` +INFO: Session database: separate instance at sessions-host:5433/sessions_db +``` +or +``` +INFO: Session database: custom URL +``` + +## Conclusion + +By defaulting to shared PostgreSQL configuration, we've made session history: +- ✅ Easier to set up +- ✅ Less confusing +- ✅ More intuitive +- ✅ Still flexible when needed + +**Bottom line**: Just set `SESSION_HISTORY_ENABLED=true` and you're done! 🎉 + diff --git a/SESSION_FINAL_SIMPLIFICATION.md b/SESSION_FINAL_SIMPLIFICATION.md new file mode 100644 index 00000000..6a776866 --- /dev/null +++ b/SESSION_FINAL_SIMPLIFICATION.md @@ -0,0 +1,169 @@ +# Session History - Final Simplification + +## What Changed + +Based on user feedback, we've completely removed `SESSION_POSTGRES_*` variables and simplified to use only the existing `POSTGRES_*` configuration. + +## Before vs After + +### ❌ Before (Too Complex) +```bash +SESSION_POSTGRES_HOST=localhost +SESSION_POSTGRES_PORT=5433 +SESSION_POSTGRES_USER=lightrag +SESSION_POSTGRES_PASSWORD=lightrag_password +SESSION_POSTGRES_DATABASE=lightrag_sessions +``` + +### ✅ After (Simple!) +```bash +# Just enable it! +SESSION_HISTORY_ENABLED=true + +# That's it! Uses existing POSTGRES_* automatically +``` + +## Configuration + +Session history now **always** uses the same PostgreSQL as LightRAG: + +```bash +# Your existing LightRAG configuration +POSTGRES_HOST=localhost +POSTGRES_PORT=5432 +POSTGRES_USER=postgres +POSTGRES_PASSWORD=your_password +POSTGRES_DATABASE=lightrag_db + +# Enable session history - no additional config needed! +SESSION_HISTORY_ENABLED=true +``` + +## Database Tables + +These tables will be created in your `POSTGRES_DATABASE`: + +- `lightrag_chat_sessions_history` +- `lightrag_chat_messages_history` +- `lightrag_message_citations_history` + +All in the **same database** as your LightRAG data. Clean and simple! + +## Docker Compose + +No separate database container needed: + +```yaml +services: + lightrag: + # ... your existing config + # Session history uses same PostgreSQL +``` + +## Benefits + +1. ✅ **Zero additional configuration** +2. ✅ **No confusion about which ENV to use** +3. ✅ **One PostgreSQL instance** +4. ✅ **Easier to manage** +5. ✅ **Simpler docker setup** + +## Migration + +If you had `SESSION_POSTGRES_*` in your `.env`, just remove them: + +```bash +# Remove these lines (no longer used) +# SESSION_POSTGRES_HOST=... +# SESSION_POSTGRES_PORT=... +# SESSION_POSTGRES_USER=... +# SESSION_POSTGRES_PASSWORD=... +# SESSION_POSTGRES_DATABASE=... + +# Keep only this +SESSION_HISTORY_ENABLED=true +``` + +## Code Changes + +### `session_database.py` +- Removed all `SESSION_POSTGRES_*` references +- Uses `POSTGRES_*` directly +- Cleaner, simpler code + +### `env.example` +- Removed all `SESSION_POSTGRES_*` variables +- Single line: `SESSION_HISTORY_ENABLED=true` + +### `docker-compose.yml` +- Removed separate `session-db` service +- No volumes needed for separate session DB + +## Why This Makes Sense + +1. **Single Source of Truth**: One set of database credentials +2. **No Duplication**: Don't repeat POSTGRES_* with different names +3. **KISS Principle**: Keep It Simple, Stupid +4. **User Feedback**: Based on actual user needs + +## Use Cases + +### Development +```bash +POSTGRES_HOST=localhost +POSTGRES_DATABASE=dev_lightrag +SESSION_HISTORY_ENABLED=true +``` +✅ Everything in one place + +### Production +```bash +POSTGRES_HOST=prod-db.example.com +POSTGRES_DATABASE=lightrag_prod +SESSION_HISTORY_ENABLED=true +``` +✅ Production-ready with minimal config + +### Testing +```bash +POSTGRES_HOST=localhost +POSTGRES_DATABASE=test_lightrag +SESSION_HISTORY_ENABLED=false +``` +✅ Easy to disable when not needed + +## What If I Need Separate Database? + +If you **really** need a separate database for sessions (rare case), you can: + +1. Use a different `POSTGRES_DATABASE` name in Docker Compose +2. Or modify `session_database.py` locally for your needs + +But honestly, for 99% of use cases, same database is fine! + +## Summary + +**Before**: Confusing with multiple ENV variables for the same thing +**After**: One line to enable, uses existing configuration + +That's the power of simplicity! 🎉 + +--- + +## Technical Details + +The `SessionDatabaseConfig` class now simply reads `POSTGRES_*`: + +```python +class SessionDatabaseConfig: + def __init__(self): + self.host = os.getenv("POSTGRES_HOST", "localhost") + self.port = os.getenv("POSTGRES_PORT", "5432") + self.user = os.getenv("POSTGRES_USER", "postgres") + self.password = os.getenv("POSTGRES_PASSWORD", "password") + self.database = os.getenv("POSTGRES_DATABASE", "lightrag_db") + # ... build connection string +``` + +No fallbacks, no overrides, no confusion. Just works! ✨ + diff --git a/SESSION_INTEGRATION_SUMMARY.md b/SESSION_INTEGRATION_SUMMARY.md new file mode 100644 index 00000000..fddb2fe9 --- /dev/null +++ b/SESSION_INTEGRATION_SUMMARY.md @@ -0,0 +1,259 @@ +# Session History Integration Summary + +## Overview + +The session history feature has been successfully integrated from the standalone `service/` folder into the main LightRAG codebase. This document provides a summary of all changes made. + +## Changes Made + +### 1. New Files Created + +#### Core Session History Modules (`lightrag/api/`) +- `session_models.py` - SQLAlchemy database models for sessions, messages, and citations +- `session_schemas.py` - Pydantic schemas for API request/response validation +- `session_database.py` - Database configuration and connection management +- `session_manager.py` - Business logic for session operations + +#### Updated Files +- `lightrag/api/routers/history_routes.py` - Updated to use new integrated modules +- `lightrag/api/lightrag_server.py` - Added session database initialization + +### 2. Configuration Files Updated + +#### `docker-compose.yml` +- Added `session-db` service (PostgreSQL 16) +- Configured volume for persistent session data +- Added health checks for database availability +- Set up proper service dependencies + +#### `env.example` +- Added `SESSION_HISTORY_ENABLED` flag +- Added `SESSION_POSTGRES_*` configuration variables +- Included fallback to main `POSTGRES_*` settings + +#### `README.md` +- Added comprehensive "Session History Feature" section +- Documented configuration options +- Provided Docker deployment instructions +- Added API endpoint examples +- Included usage examples + +### 3. Documentation + +#### New Documents +- `docs/SessionHistoryMigration.md` - Complete migration guide + - Step-by-step migration instructions + - Configuration reference + - Troubleshooting section + - API examples + +- `scripts/migrate_session_history.sh` - Automated migration script + - Checks and updates `.env` configuration + - Handles backup of old `service/` folder + - Tests database connectivity + - Provides next steps + +## Architecture Changes + +### Before (Standalone Service) +``` +service/ +├── main.py # Separate FastAPI app +├── app/ +│ ├── core/ +│ │ ├── config.py # Separate configuration +│ │ └── database.py # Separate DB management +│ ├── models/ +│ │ ├── models.py # SQLAlchemy models +│ │ └── schemas.py # Pydantic schemas +│ ├── services/ +│ │ ├── history_manager.py # Business logic +│ │ └── lightrag_wrapper.py +│ └── api/ +│ └── routes.py # API endpoints +``` + +### After (Integrated) +``` +lightrag/ +└── api/ + ├── session_models.py # SQLAlchemy models + ├── session_schemas.py # Pydantic schemas + ├── session_database.py # DB management + ├── session_manager.py # Business logic + ├── lightrag_server.py # Main server (updated) + └── routers/ + └── history_routes.py # API endpoints (updated) +``` + +## Key Features + +### 1. Automatic Initialization +- Session database is automatically initialized when LightRAG Server starts +- Graceful degradation if database is unavailable +- Tables are created automatically on first run + +### 2. Unified Configuration +- All configuration through main `.env` file +- Fallback to main PostgreSQL settings if session-specific settings not provided +- Easy enable/disable via `SESSION_HISTORY_ENABLED` flag + +### 3. Docker Integration +- PostgreSQL container automatically configured in `docker-compose.yml` +- Persistent volumes for data retention +- Health checks for reliability +- Proper service dependencies + +### 4. API Consistency +- Session endpoints follow LightRAG API conventions +- Proper authentication headers (`X-User-ID`) +- RESTful endpoint design +- Comprehensive error handling + +## API Endpoints + +All session history endpoints are now under the `/history` prefix: + +| Method | Endpoint | Description | +|--------|----------|-------------| +| POST | `/history/sessions` | Create a new chat session | +| GET | `/history/sessions` | List all sessions for user | +| GET | `/history/sessions/{id}/history` | Get message history | +| DELETE | `/history/sessions/{id}` | Delete session and messages | + +## Migration Path + +### For New Installations +1. Copy `env.example` to `.env` +2. Configure `SESSION_POSTGRES_*` variables +3. Run `docker compose up -d` (if using Docker) +4. Start LightRAG server: `lightrag-server` + +### For Existing Installations with service/ +1. Run migration script: `bash scripts/migrate_session_history.sh` +2. Update `.env` with session configuration +3. Restart LightRAG server +4. Test session endpoints +5. Backup and remove old `service/` folder (optional) + +## Configuration Examples + +### Minimal Configuration (Uses Defaults) +```bash +SESSION_HISTORY_ENABLED=true +``` + +### Full Configuration +```bash +SESSION_HISTORY_ENABLED=true +SESSION_POSTGRES_HOST=localhost +SESSION_POSTGRES_PORT=5433 +SESSION_POSTGRES_USER=lightrag +SESSION_POSTGRES_PASSWORD=secure_password +SESSION_POSTGRES_DATABASE=lightrag_sessions +``` + +### Using Main PostgreSQL Instance +```bash +SESSION_HISTORY_ENABLED=true +# Session will use main POSTGRES_* settings +POSTGRES_HOST=localhost +POSTGRES_PORT=5432 +POSTGRES_USER=postgres +POSTGRES_PASSWORD=password +POSTGRES_DATABASE=lightrag_db +``` + +### Disabled Session History +```bash +SESSION_HISTORY_ENABLED=false +# No PostgreSQL required for session history +``` + +## Testing + +### Manual Testing +```bash +# Create a session +curl -X POST http://localhost:9621/history/sessions \ + -H "Content-Type: application/json" \ + -H "X-User-ID: test@example.com" \ + -d '{"title": "Test Session"}' + +# List sessions +curl http://localhost:9621/history/sessions \ + -H "X-User-ID: test@example.com" + +# Get session history +curl http://localhost:9621/history/sessions/{session_id}/history +``` + +### Docker Testing +```bash +# Start all services +docker compose up -d + +# Check logs +docker compose logs -f lightrag session-db + +# Verify database +docker exec -it lightrag-session-db psql -U lightrag -d lightrag_sessions -c '\dt' +``` + +## Dependencies + +All required dependencies are already included in `pyproject.toml`: +- `sqlalchemy` - ORM for database operations +- `psycopg2-binary` - PostgreSQL driver +- `fastapi` - Web framework +- `pydantic` - Data validation + +## Next Steps + +### Cleanup (Optional) +After successful migration and testing: +```bash +# Backup old service folder +mv service service.backup.$(date +%Y%m%d) + +# Or remove completely +rm -rf service +``` + +### Monitoring +- Check server logs for session initialization messages +- Monitor PostgreSQL connections +- Review session creation and query performance + +### Customization +- Modify session models in `session_models.py` +- Extend API endpoints in `routers/history_routes.py` +- Add custom business logic in `session_manager.py` + +## Rollback Plan + +If needed, to rollback to standalone service: +1. Restore `service/` folder from backup +2. Remove session configuration from `.env` +3. Revert changes to `docker-compose.yml` +4. Restart services + +## Support + +For issues or questions: +- Review `docs/SessionHistoryMigration.md` +- Check LightRAG documentation +- Open an issue on GitHub + +## Conclusion + +The session history feature is now fully integrated into LightRAG as a first-class feature. The integration provides: +- ✅ Easier setup and configuration +- ✅ Better maintainability +- ✅ Unified Docker deployment +- ✅ Consistent API design +- ✅ Comprehensive documentation +- ✅ Automated migration tools + +The old `service/` folder can now be safely removed or kept as backup. + diff --git a/docker-compose.yml b/docker-compose.yml index e967198a..df7f2bd5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,4 +19,4 @@ services: - .env restart: unless-stopped extra_hosts: - - "host.docker.internal:host-gateway" + - "host.docker.internal:host-gateway" \ No newline at end of file diff --git a/docs/SessionHistoryMigration.md b/docs/SessionHistoryMigration.md new file mode 100644 index 00000000..0b4a9a14 --- /dev/null +++ b/docs/SessionHistoryMigration.md @@ -0,0 +1,193 @@ +# Session History Migration Guide + +## Overview + +The session history functionality has been migrated from the standalone `service/` folder into the main LightRAG codebase as an integrated feature. This document explains the changes and migration steps. + +## What Changed + +### Before (Standalone Service) + +- Session history was implemented as a separate service in the `service/` folder +- Required manual setup and configuration +- Separate database connections and initialization +- Required adding service path to sys.path + +### After (Integrated Feature) + +- Session history is now a built-in feature of LightRAG Server +- Automatically initialized when LightRAG Server starts +- Unified configuration through `.env` file +- Native integration with LightRAG API + +## Migration Steps + +### 1. Update Dependencies + +The session history feature requires SQLAlchemy and PostgreSQL driver: + +```bash +# Using uv (recommended) +uv pip install sqlalchemy psycopg2-binary + +# Or using pip +pip install sqlalchemy psycopg2-binary +``` + +### 2. Update Configuration + +Move your session database configuration to the main `.env` file: + +```bash +# Enable session history feature +SESSION_HISTORY_ENABLED=true + +# PostgreSQL configuration for session history +SESSION_POSTGRES_HOST=localhost +SESSION_POSTGRES_PORT=5433 +SESSION_POSTGRES_USER=lightrag +SESSION_POSTGRES_PASSWORD=lightrag_password +SESSION_POSTGRES_DATABASE=lightrag_sessions +``` + +### 3. Update Docker Compose (if using Docker) + +The new `docker-compose.yml` includes PostgreSQL service automatically: + +```bash +# Stop existing services +docker compose down + +# Pull/build new images +docker compose pull +docker compose build + +# Start all services +docker compose up -d +``` + +### 4. API Endpoints + +Session history endpoints are under the `/history` prefix: + +``` +POST /history/sessions - Create session +GET /history/sessions - List sessions +GET /history/sessions/{id}/history - Get messages +DELETE /history/sessions/{id} - Delete session +``` + +### 5. Remove Old Service Folder + +Once migration is complete and tested, you can safely remove the old `service/` folder: + +```bash +# Backup first (optional) +mv service service.backup + +# Or remove directly +rm -rf service +``` + +## New Features + +The integrated session history includes several improvements: + +1. **Automatic Initialization**: Session database is automatically initialized on server startup +2. **Graceful Degradation**: If session database is unavailable, server still starts (without history features) +3. **Better Error Handling**: Improved error messages and logging +4. **User Isolation**: Proper user ID handling via `X-User-ID` header +5. **Session Deletion**: New endpoint to delete sessions and messages + +## Configuration Reference + +### Configuration + +Session history is **always enabled** and uses the same PostgreSQL as LightRAG: + +- No environment variables needed +- Session tables created automatically in `POSTGRES_DATABASE` +- Works out of the box when PostgreSQL is configured + +That's it - zero configuration! + +## Troubleshooting + +### Session history not available + +**Symptom**: `/history/sessions` endpoints return 404 + +**Solution**: +1. Check that `SESSION_HISTORY_ENABLED=true` in `.env` +2. Verify PostgreSQL is running and accessible +3. Check server logs for initialization errors + +### Database connection errors + +**Symptom**: Server starts but session endpoints fail with database errors + +**Solution**: +1. Verify PostgreSQL credentials in `.env` +2. Ensure PostgreSQL is accessible from your network +3. Check PostgreSQL logs for connection issues +4. For Docker: ensure `session-db` container is running + +### Migration from old service + +**Symptom**: Want to preserve existing session data + +**Solution**: +The database schema is compatible. Point `SESSION_DATABASE_URL` to your existing PostgreSQL database and the tables will be reused. + +## API Examples + +### Create a Session + +```python +import requests + +response = requests.post( + "http://localhost:9621/history/sessions", + json={"title": "Research Session"}, + headers={"X-User-ID": "user@example.com"} +) +print(response.json()) +``` + +### List Sessions + +```python +response = requests.get( + "http://localhost:9621/history/sessions", + headers={"X-User-ID": "user@example.com"} +) +print(response.json()) +``` + +### Get Session History + +```python +session_id = "..." # UUID from create session +response = requests.get( + f"http://localhost:9621/history/sessions/{session_id}/history" +) +print(response.json()) +``` + +### Delete Session + +```python +response = requests.delete( + f"http://localhost:9621/history/sessions/{session_id}", + headers={"X-User-ID": "user@example.com"} +) +print(response.status_code) # 204 on success +``` + +## Support + +For issues or questions: +- Check the main [README.md](../README.md) +- Review [LightRAG Server documentation](../lightrag/api/README.md) +- Open an issue on [GitHub](https://github.com/HKUDS/LightRAG/issues) + diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index c7565bd5..4535c6b2 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -406,7 +406,17 @@ def create_app(args): } app = FastAPI(**app_kwargs) - app.include_router(history_router) + + # Initialize session history database + try: + from lightrag.api.session_database import get_session_db_manager + logger.info("Initializing session history database...") + session_db_manager = get_session_db_manager() + logger.info("Session history database initialized successfully") + app.include_router(history_router) + except Exception as e: + logger.warning(f"Session history initialization failed: {e}") + logger.warning("Session history endpoints will be unavailable. Check PostgreSQL configuration.") # Add custom validation error handler for /query/data endpoint @app.exception_handler(RequestValidationError) diff --git a/lightrag/api/routers/history_routes.py b/lightrag/api/routers/history_routes.py index e32ec9db..4f138fa8 100644 --- a/lightrag/api/routers/history_routes.py +++ b/lightrag/api/routers/history_routes.py @@ -1,72 +1,154 @@ -from fastapi import APIRouter, Depends, HTTPException, Header +""" +Session History Routes for LightRAG API + +This module provides REST API endpoints for managing chat sessions +and conversation history. +""" + +from fastapi import APIRouter, Depends, HTTPException, Header, status from sqlalchemy.orm import Session from typing import List, Optional from uuid import UUID -import sys -import os +import time -# Ensure service module is in path (similar to query_routes.py) -project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")) -service_dir = os.path.join(project_root, "service") -if service_dir not in sys.path: - sys.path.append(service_dir) +from lightrag.api.session_database import get_db +from lightrag.api.session_manager import SessionHistoryManager +from lightrag.api.session_schemas import ( + SessionResponse, + SessionCreate, + ChatMessageResponse, + ChatMessageRequest, +) +from lightrag.utils import logger -try: - from app.core.database import get_db - from app.services.history_manager import HistoryManager - from app.models.schemas import SessionResponse, SessionCreate, ChatMessageResponse -except ImportError: - # Fallback if service not found (shouldn't happen if setup is correct) - get_db = None - HistoryManager = None - SessionResponse = None - SessionCreate = None - ChatMessageResponse = None +router = APIRouter(prefix="/history", tags=["Session History"]) -router = APIRouter() - -def check_dependencies(): - if not HistoryManager: - raise HTTPException(status_code=503, detail="History service not available") async def get_current_user_id( x_user_id: Optional[str] = Header(None, alias="X-User-ID") ) -> str: - # Prefer X-User-ID, default to default_user - uid = x_user_id - if not uid: - # Fallback to default user if no header provided (for backward compatibility or dev) - # Or raise error if strict - return "default_user" - return uid + """ + Extract user ID from request header. + + Args: + x_user_id: User ID from X-User-ID header. + + Returns: + User ID string, defaults to 'default_user' if not provided. + """ + return x_user_id or "default_user" -@router.get("/sessions", response_model=List[SessionResponse], tags=["History"]) -def list_sessions( - skip: int = 0, - limit: int = 20, + +@router.get("/sessions", response_model=List[SessionResponse]) +async def list_sessions( + skip: int = 0, + limit: int = 20, db: Session = Depends(get_db), - current_user_id: str = Depends(get_current_user_id) + current_user_id: str = Depends(get_current_user_id), ): - check_dependencies() - manager = HistoryManager(db) - sessions = manager.list_sessions(user_id=current_user_id, skip=skip, limit=limit) - return sessions + """ + List all chat sessions for the current user. + + Args: + skip: Number of sessions to skip (for pagination). + limit: Maximum number of sessions to return. + db: Database session. + current_user_id: Current user identifier. + + Returns: + List of session response objects. + """ + try: + manager = SessionHistoryManager(db) + sessions = manager.list_sessions(user_id=current_user_id, skip=skip, limit=limit) + return sessions + except Exception as e: + logger.error(f"Error listing sessions: {e}") + raise HTTPException(status_code=500, detail=str(e)) -@router.post("/sessions", response_model=SessionResponse, tags=["History"]) -def create_session( - session_in: SessionCreate, + +@router.post("/sessions", response_model=SessionResponse, status_code=status.HTTP_201_CREATED) +async def create_session( + session_in: SessionCreate, db: Session = Depends(get_db), - current_user_id: str = Depends(get_current_user_id) + current_user_id: str = Depends(get_current_user_id), ): - check_dependencies() - manager = HistoryManager(db) - return manager.create_session(user_id=current_user_id, title=session_in.title) + """ + Create a new chat session. + + Args: + session_in: Session creation request. + db: Database session. + current_user_id: Current user identifier. + + Returns: + Created session response. + """ + try: + manager = SessionHistoryManager(db) + session = manager.create_session( + user_id=current_user_id, + title=session_in.title, + rag_config=session_in.rag_config, + ) + return session + except Exception as e: + logger.error(f"Error creating session: {e}") + raise HTTPException(status_code=500, detail=str(e)) -@router.get("/sessions/{session_id}/history", response_model=List[ChatMessageResponse], tags=["History"]) -def get_session_history( - session_id: str, - db: Session = Depends(get_db) + +@router.get("/sessions/{session_id}/history", response_model=List[ChatMessageResponse]) +async def get_session_history( + session_id: UUID, + db: Session = Depends(get_db), ): - check_dependencies() - manager = HistoryManager(db) - return manager.get_session_history(session_id) + """ + Get all messages for a specific session. + + Args: + session_id: Session UUID. + db: Database session. + + Returns: + List of chat message responses with citations. + """ + try: + manager = SessionHistoryManager(db) + messages = manager.get_session_history(session_id) + return messages + except Exception as e: + logger.error(f"Error getting session history: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.delete("/sessions/{session_id}", status_code=status.HTTP_204_NO_CONTENT) +async def delete_session( + session_id: UUID, + db: Session = Depends(get_db), + current_user_id: str = Depends(get_current_user_id), +): + """ + Delete a chat session and all its messages. + + Args: + session_id: Session UUID. + db: Database session. + current_user_id: Current user identifier. + """ + try: + manager = SessionHistoryManager(db) + + # Verify session belongs to user + session = manager.get_session(session_id) + if not session: + raise HTTPException(status_code=404, detail="Session not found") + + if session.user_id != current_user_id: + raise HTTPException(status_code=403, detail="Not authorized to delete this session") + + manager.delete_session(session_id) + except HTTPException: + raise + except Exception as e: + logger.error(f"Error deleting session: {e}") + raise HTTPException(status_code=500, detail=str(e)) diff --git a/lightrag/api/session_database.py b/lightrag/api/session_database.py new file mode 100644 index 00000000..ab4d36d1 --- /dev/null +++ b/lightrag/api/session_database.py @@ -0,0 +1,163 @@ +""" +Session History Database Configuration and Utilities + +This module provides database connection and session management +for the LightRAG session history feature. +""" + +import os +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from contextlib import contextmanager +from typing import Optional +from lightrag.utils import logger +from urllib.parse import quote_plus + + +class SessionDatabaseConfig: + """ + Configuration for session history database. + + Uses the same PostgreSQL configuration as LightRAG (POSTGRES_* env vars). + Session history tables will be created in the same database as LightRAG data. + """ + + def __init__(self): + """ + Initialize database configuration from environment variables. + + Uses POSTGRES_* variables directly - same database as LightRAG. + """ + self.host = os.getenv("POSTGRES_HOST", "localhost") + self.port = os.getenv("POSTGRES_PORT", "5432") + self.user = os.getenv("POSTGRES_USER", "postgres") + self.password = os.getenv("POSTGRES_PASSWORD", "password") + self.database = os.getenv("POSTGRES_DATABASE", "lightrag_db") + + # Encode credentials to handle special characters + encoded_user = quote_plus(self.user) + encoded_password = quote_plus(self.password) + + self.database_url = f"postgresql://{encoded_user}:{encoded_password}@{self.host}:{self.port}/{self.database}" + + logger.info(f"Session database: {self.host}:{self.port}/{self.database}") + + +class SessionDatabaseManager: + """Manages database connections for session history.""" + + def __init__(self, config: Optional[SessionDatabaseConfig] = None): + """ + Initialize database manager. + + Args: + config: Database configuration. If None, creates default config. + """ + self.config = config or SessionDatabaseConfig() + self.engine = None + self.SessionLocal = None + + def initialize(self): + """Initialize database engine and session factory.""" + if self.engine is not None: + logger.debug("Session database already initialized") + return + + try: + self.engine = create_engine( + self.config.database_url, + pool_pre_ping=True, + pool_size=5, + max_overflow=10 + ) + self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine) + logger.info("Session database initialized successfully") + except Exception as e: + logger.error(f"Failed to initialize session database: {e}") + raise + + def create_tables(self): + """Create all session history tables if they don't exist.""" + if self.engine is None: + raise RuntimeError("Database not initialized. Call initialize() first.") + + try: + from lightrag.api.session_models import Base + Base.metadata.create_all(bind=self.engine) + logger.info("Session history tables created/verified") + except Exception as e: + logger.error(f"Failed to create session tables: {e}") + raise + + def get_session(self): + """ + Get a database session. + + Returns: + SQLAlchemy session object. + + Raises: + RuntimeError: If database not initialized. + """ + if self.SessionLocal is None: + raise RuntimeError("Database not initialized. Call initialize() first.") + return self.SessionLocal() + + @contextmanager + def session_scope(self): + """ + Provide a transactional scope for database operations. + + Yields: + Database session that will be committed on success or rolled back on error. + """ + session = self.get_session() + try: + yield session + session.commit() + except Exception: + session.rollback() + raise + finally: + session.close() + + def close(self): + """Close database connections.""" + if self.engine: + self.engine.dispose() + logger.info("Session database connections closed") + + +# Global database manager instance +_db_manager: Optional[SessionDatabaseManager] = None + + +def get_session_db_manager() -> SessionDatabaseManager: + """ + Get the global session database manager instance. + + Returns: + SessionDatabaseManager instance. + """ + global _db_manager + if _db_manager is None: + _db_manager = SessionDatabaseManager() + _db_manager.initialize() + _db_manager.create_tables() + return _db_manager + + +def get_db(): + """ + Dependency function for FastAPI to get database session. + + Yields: + Database session. + """ + db_manager = get_session_db_manager() + db = db_manager.get_session() + try: + yield db + finally: + db.close() + diff --git a/lightrag/api/session_manager.py b/lightrag/api/session_manager.py new file mode 100644 index 00000000..c2d3c35a --- /dev/null +++ b/lightrag/api/session_manager.py @@ -0,0 +1,226 @@ +""" +Session History Manager for LightRAG API + +This module provides business logic for managing chat sessions, +messages, and citations. +""" + +from sqlalchemy.orm import Session +from lightrag.api.session_models import ChatMessage, ChatSession, MessageCitation +from typing import List, Dict, Optional +import uuid + + +class SessionHistoryManager: + """Manager for chat session history operations.""" + + def __init__(self, db: Session): + """ + Initialize session history manager. + + Args: + db: SQLAlchemy database session. + """ + self.db = db + + def get_conversation_context( + self, + session_id: uuid.UUID, + max_tokens: int = 4000 + ) -> List[Dict[str, str]]: + """ + Retrieve conversation history formatted for LLM context. + + Args: + session_id: Session UUID to retrieve messages from. + max_tokens: Maximum number of tokens to include. + + Returns: + List of message dictionaries with 'role' and 'content' keys. + """ + # Get latest messages first + raw_messages = ( + self.db.query(ChatMessage) + .filter(ChatMessage.session_id == session_id) + .order_by(ChatMessage.created_at.desc()) + .limit(20) # Safe buffer + .all() + ) + + context = [] + current_tokens = 0 + + for msg in raw_messages: + # Simple token estimation (approx 4 chars per token) + msg_tokens = msg.token_count or len(msg.content) // 4 + if current_tokens + msg_tokens > max_tokens: + break + + context.append({"role": msg.role, "content": msg.content}) + current_tokens += msg_tokens + + return list(reversed(context)) + + def create_session( + self, + user_id: str, + title: str = None, + rag_config: dict = None + ) -> ChatSession: + """ + Create a new chat session. + + Args: + user_id: User identifier. + title: Optional session title. + rag_config: Optional RAG configuration dictionary. + + Returns: + Created ChatSession instance. + """ + session = ChatSession( + user_id=user_id, + title=title, + rag_config=rag_config or {} + ) + self.db.add(session) + self.db.commit() + self.db.refresh(session) + return session + + def get_session(self, session_id: uuid.UUID) -> Optional[ChatSession]: + """ + Get a session by ID. + + Args: + session_id: Session UUID. + + Returns: + ChatSession instance or None if not found. + """ + return self.db.query(ChatSession).filter(ChatSession.id == session_id).first() + + def list_sessions( + self, + user_id: str, + skip: int = 0, + limit: int = 100 + ) -> List[ChatSession]: + """ + List sessions for a user. + + Args: + user_id: User identifier. + skip: Number of sessions to skip. + limit: Maximum number of sessions to return. + + Returns: + List of ChatSession instances. + """ + return ( + self.db.query(ChatSession) + .filter(ChatSession.user_id == user_id) + .order_by(ChatSession.last_message_at.desc()) + .offset(skip) + .limit(limit) + .all() + ) + + def save_message( + self, + session_id: uuid.UUID, + role: str, + content: str, + token_count: int = None, + processing_time: float = None + ) -> ChatMessage: + """ + Save a message to a session. + + Args: + session_id: Session UUID. + role: Message role (user, assistant, system). + content: Message content. + token_count: Optional token count. + processing_time: Optional processing time in seconds. + + Returns: + Created ChatMessage instance. + """ + message = ChatMessage( + session_id=session_id, + role=role, + content=content, + token_count=token_count, + processing_time=processing_time + ) + self.db.add(message) + self.db.commit() + self.db.refresh(message) + + # Update session last_message_at + session = self.get_session(session_id) + if session: + session.last_message_at = message.created_at + self.db.commit() + + return message + + def save_citations(self, message_id: uuid.UUID, citations: List[Dict]): + """ + Save citations for a message. + + Args: + message_id: Message UUID. + citations: List of citation dictionaries. + """ + for cit in citations: + # Handle both list and string content + content = cit.get("content", "") + if isinstance(content, list): + content = "\n".join(content) + + citation = MessageCitation( + message_id=message_id, + source_doc_id=cit.get("reference_id", cit.get("source_doc_id", "unknown")), + file_path=cit.get("file_path", "unknown"), + chunk_content=content, + relevance_score=cit.get("relevance_score") + ) + self.db.add(citation) + self.db.commit() + + def get_session_history(self, session_id: uuid.UUID) -> List[ChatMessage]: + """ + Get all messages for a session. + + Args: + session_id: Session UUID. + + Returns: + List of ChatMessage instances ordered by creation time. + """ + return ( + self.db.query(ChatMessage) + .filter(ChatMessage.session_id == session_id) + .order_by(ChatMessage.created_at.asc()) + .all() + ) + + def delete_session(self, session_id: uuid.UUID) -> bool: + """ + Delete a session and all its messages. + + Args: + session_id: Session UUID. + + Returns: + True if session was deleted, False if not found. + """ + session = self.get_session(session_id) + if session: + self.db.delete(session) + self.db.commit() + return True + return False + diff --git a/lightrag/api/session_models.py b/lightrag/api/session_models.py new file mode 100644 index 00000000..f52a5a40 --- /dev/null +++ b/lightrag/api/session_models.py @@ -0,0 +1,65 @@ +""" +Session History Models for LightRAG API + +This module provides database models for storing chat session history, including: +- Chat sessions for organizing conversations +- Chat messages for storing user/assistant interactions +- Message citations for tracking source references +""" + +import uuid +from sqlalchemy import Column, String, Boolean, DateTime, ForeignKey, Text, Integer, Float, JSON +from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.orm import relationship, declarative_base +from sqlalchemy.sql import func + +Base = declarative_base() + + +class ChatSession(Base): + """Chat session model for grouping related conversations.""" + + __tablename__ = "lightrag_chat_sessions_history" + + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + user_id = Column(String(255), nullable=False, index=True) + title = Column(String(255), nullable=True) + rag_config = Column(JSON, default={}) + summary = Column(Text, nullable=True) + last_message_at = Column(DateTime(timezone=True), server_default=func.now(), index=True) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + messages = relationship("ChatMessage", back_populates="session", cascade="all, delete-orphan") + + +class ChatMessage(Base): + """Chat message model for storing individual messages in a session.""" + + __tablename__ = "lightrag_chat_messages_history" + + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + session_id = Column(UUID(as_uuid=True), ForeignKey("lightrag_chat_sessions_history.id", ondelete="CASCADE"), nullable=False) + role = Column(String(20), nullable=False) # user, assistant, system + content = Column(Text, nullable=False) + token_count = Column(Integer, nullable=True) + processing_time = Column(Float, nullable=True) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + + session = relationship("ChatSession", back_populates="messages") + citations = relationship("MessageCitation", back_populates="message", cascade="all, delete-orphan") + + +class MessageCitation(Base): + """Message citation model for tracking source references.""" + + __tablename__ = "lightrag_message_citations_history" + + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + message_id = Column(UUID(as_uuid=True), ForeignKey("lightrag_chat_messages_history.id", ondelete="CASCADE"), nullable=False) + source_doc_id = Column(String(255), nullable=False, index=True) + file_path = Column(Text, nullable=False) + chunk_content = Column(Text, nullable=True) + relevance_score = Column(Float, nullable=True) + + message = relationship("ChatMessage", back_populates="citations") + diff --git a/lightrag/api/session_schemas.py b/lightrag/api/session_schemas.py new file mode 100644 index 00000000..749afb2b --- /dev/null +++ b/lightrag/api/session_schemas.py @@ -0,0 +1,65 @@ +""" +Session History Pydantic Schemas for LightRAG API + +This module provides Pydantic schemas for request/response validation +of session history endpoints. +""" + +from pydantic import BaseModel, Field +from typing import List, Optional, Dict, Any +from uuid import UUID +from datetime import datetime + + +class SessionCreate(BaseModel): + """Schema for creating a new chat session.""" + + title: Optional[str] = Field(None, description="Optional title for the session") + rag_config: Optional[Dict[str, Any]] = Field(default_factory=dict, description="RAG configuration for this session") + + +class SessionResponse(BaseModel): + """Schema for chat session response.""" + + id: UUID + title: Optional[str] + created_at: datetime + last_message_at: Optional[datetime] + + class Config: + from_attributes = True + + +class ChatMessageRequest(BaseModel): + """Schema for sending a chat message.""" + + session_id: UUID = Field(..., description="Session ID to add message to") + content: str = Field(..., description="Message content") + mode: Optional[str] = Field("hybrid", description="Query mode: local, global, hybrid, naive, mix") + stream: Optional[bool] = Field(False, description="Enable streaming response") + + +class Citation(BaseModel): + """Schema for message citation.""" + + source_doc_id: str + file_path: str + chunk_content: Optional[str] = None + relevance_score: Optional[float] = None + + class Config: + from_attributes = True + + +class ChatMessageResponse(BaseModel): + """Schema for chat message response.""" + + id: UUID + content: str + role: str + created_at: datetime + citations: List[Citation] = Field(default_factory=list) + + class Config: + from_attributes = True + diff --git a/scripts/migrate_session_history.sh b/scripts/migrate_session_history.sh new file mode 100644 index 00000000..bca3f0ea --- /dev/null +++ b/scripts/migrate_session_history.sh @@ -0,0 +1,133 @@ +#!/bin/bash +# Migration script for Session History integration +# This script helps migrate from standalone service/ folder to integrated session history + +set -e # Exit on error + +echo "==========================================" +echo "LightRAG Session History Migration Script" +echo "==========================================" +echo "" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Check if .env file exists +if [ ! -f ".env" ]; then + echo -e "${YELLOW}Warning: .env file not found${NC}" + echo "Creating .env from env.example..." + cp env.example .env + echo -e "${GREEN}Created .env file. Please update it with your configuration.${NC}" + echo "" +fi + +# Check if session history config exists in .env +# Session history is now always enabled - no configuration needed! +echo -e "${GREEN}Session history is always enabled by default${NC}" +echo -e "${GREEN}Uses existing POSTGRES_* settings automatically${NC}" +echo "" + +# Check if old service folder exists +if [ -d "service" ]; then + echo -e "${YELLOW}Found old service/ folder${NC}" + echo "Options:" + echo " 1) Backup and remove" + echo " 2) Keep as-is" + echo " 3) Exit" + read -p "Choose option (1-3): " choice + + case $choice in + 1) + backup_name="service.backup.$(date +%Y%m%d_%H%M%S)" + echo "Creating backup: $backup_name" + mv service "$backup_name" + echo -e "${GREEN}Old service folder backed up to $backup_name${NC}" + ;; + 2) + echo "Keeping service/ folder as-is" + ;; + 3) + echo "Exiting..." + exit 0 + ;; + *) + echo -e "${RED}Invalid option${NC}" + exit 1 + ;; + esac + echo "" +fi + +# Check if dependencies are installed +echo "Checking Python dependencies..." +python -c "import sqlalchemy" 2>/dev/null || { + echo -e "${YELLOW}SQLAlchemy not found. Installing...${NC}" + pip install sqlalchemy psycopg2-binary +} +echo -e "${GREEN}Dependencies OK${NC}" +echo "" + +# Test database connection (optional) +echo "Would you like to test the PostgreSQL connection? (y/n)" +read -p "Test connection: " test_conn + +if [ "$test_conn" = "y" ] || [ "$test_conn" = "Y" ]; then + # Source .env file to get variables + source .env + + # Use POSTGRES_* variables + PG_HOST=${POSTGRES_HOST:-localhost} + PG_PORT=${POSTGRES_PORT:-5432} + PG_USER=${POSTGRES_USER:-postgres} + PG_PASSWORD=${POSTGRES_PASSWORD:-password} + PG_DB=${POSTGRES_DATABASE:-lightrag} + + echo "Testing connection to PostgreSQL..." + PGPASSWORD=$PG_PASSWORD psql -h $PG_HOST -p $PG_PORT -U $PG_USER -d postgres -c '\q' 2>/dev/null && { + echo -e "${GREEN}PostgreSQL connection successful${NC}" + + # Check if database exists, create if not + PGPASSWORD=$PG_PASSWORD psql -h $PG_HOST -p $PG_PORT -U $PG_USER -d postgres -lqt | cut -d \| -f 1 | grep -qw $PG_DB + if [ $? -eq 0 ]; then + echo -e "${GREEN}Database '$PG_DB' exists${NC}" + else + echo -e "${YELLOW}Database '$PG_DB' does not exist${NC}" + read -p "Create database? (y/n): " create_db + if [ "$create_db" = "y" ] || [ "$create_db" = "Y" ]; then + PGPASSWORD=$PG_PASSWORD psql -h $PG_HOST -p $PG_PORT -U $PG_USER -d postgres -c "CREATE DATABASE $PG_DB;" + echo -e "${GREEN}Database created${NC}" + fi + fi + } || { + echo -e "${RED}Failed to connect to PostgreSQL${NC}" + echo "Please check your database configuration in .env" + } + echo "" +fi + +# Docker-specific instructions +if [ -f "docker-compose.yml" ]; then + echo -e "${GREEN}Docker Compose detected${NC}" + echo "To start all services including session database:" + echo " docker compose up -d" + echo "" + echo "To view logs:" + echo " docker compose logs -f lightrag session-db" + echo "" +fi + +echo "==========================================" +echo "Migration Complete!" +echo "==========================================" +echo "" +echo "Next steps:" +echo "1. Review and update .env configuration" +echo "2. Start LightRAG server: lightrag-server" +echo "3. Test session endpoints at: http://localhost:9621/docs" +echo "4. Review migration guide: docs/SessionHistoryMigration.md" +echo "" +echo -e "${GREEN}Happy LightRAGging! 🚀${NC}" +