Merge branch 'main' into multi-embedding-support
This commit is contained in:
commit
b9f109ea7d
7 changed files with 313 additions and 159 deletions
17
README.md
17
README.md
|
|
@ -7,14 +7,13 @@
|
||||||
|
|
||||||
<a href="https://github.com/opensearch-project/OpenSearch"><img src="https://img.shields.io/badge/OpenSearch-005EB8?style=flat&logo=opensearch&logoColor=white" alt="OpenSearch"></a>
|
<a href="https://github.com/opensearch-project/OpenSearch"><img src="https://img.shields.io/badge/OpenSearch-005EB8?style=flat&logo=opensearch&logoColor=white" alt="OpenSearch"></a>
|
||||||
|
|
||||||
<a href="https://github.com/encode/starlette"><img src="https://img.shields.io/badge/Starlette-009639?style=flat&logo=fastapi&logoColor=white" alt="Starlette"></a>
|
<a href="https://github.com/docling-project/docling"><img src="https://img.shields.io/badge/Docling-000000?style=flat" alt="Langflow"></a>
|
||||||
|
|
||||||
<a href="https://github.com/vercel/next.js"><img src="https://img.shields.io/badge/Next.js-000000?style=flat&logo=next.js&logoColor=white" alt="Next.js"></a>
|
|
||||||
|
|
||||||
<a href="https://deepwiki.com/phact/openrag"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with Starlette, Next.js, OpenSearch, and Langflow integration.
|
OpenRAG is a comprehensive Retrieval-Augmented Generation platform that enables intelligent document search and AI-powered conversations. Users can upload, process, and query documents through a chat interface backed by large language models and semantic search capabilities. The system utilizes Langflow for document ingestion, retrieval workflows, and intelligent nudges, providing a seamless RAG experience. Built with [Starlette](https://github.com/Kludex/starlette) and [Next.js](https://github.com/vercel/next.js). Powered by [OpenSearch](https://github.com/opensearch-project/OpenSearch), [Langflow](https://github.com/langflow-ai/langflow), and [Docling](https://github.com/docling-project/docling).
|
||||||
|
|
||||||
|
<a href="https://deepwiki.com/phact/openrag"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
<div align="center">
|
<div align="center">
|
||||||
|
|
@ -48,7 +47,7 @@ To launch OpenRAG with the TUI, do the following:
|
||||||
|
|
||||||
The TUI opens and guides you through OpenRAG setup.
|
The TUI opens and guides you through OpenRAG setup.
|
||||||
|
|
||||||
For the full TUI guide, see [TUI](docs/docs/get-started/tui.mdx).
|
For the full TUI guide, see [TUI](https://docs.openr.ag/get-started/tui).
|
||||||
|
|
||||||
## Docker Deployment
|
## Docker Deployment
|
||||||
|
|
||||||
|
|
@ -114,7 +113,7 @@ To deploy OpenRAG with Docker:
|
||||||
| OpenSearch | http://localhost:9200 | Vector database for document storage. |
|
| OpenSearch | http://localhost:9200 | Vector database for document storage. |
|
||||||
| OpenSearch Dashboards | http://localhost:5601 | Database administration interface. |
|
| OpenSearch Dashboards | http://localhost:5601 | Database administration interface. |
|
||||||
|
|
||||||
6. Access the OpenRAG application at `http://localhost:3000` and continue with the [Quickstart](docs/docs/get-started/quickstart.mdx).
|
6. Access the OpenRAG application at `http://localhost:3000` and continue with the [Quickstart](https://docs.openr.ag/quickstart).
|
||||||
|
|
||||||
To stop `docling serve`, run:
|
To stop `docling serve`, run:
|
||||||
|
|
||||||
|
|
@ -122,11 +121,11 @@ To deploy OpenRAG with Docker:
|
||||||
uv run python scripts/docling_ctl.py stop
|
uv run python scripts/docling_ctl.py stop
|
||||||
```
|
```
|
||||||
|
|
||||||
For more information, see [Deploy with Docker](docs/docs/get-started/docker.mdx).
|
For more information, see [Deploy with Docker](https://docs.openr.ag/get-started/docker).
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
For common issues and fixes, see [Troubleshoot](docs/docs/support/troubleshoot.mdx).
|
For common issues and fixes, see [Troubleshoot](https://docs.openr.ag/support/troubleshoot).
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -86,13 +86,26 @@ function DoclingSetupDialog({
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function DoclingHealthBanner({ className }: DoclingHealthBannerProps) {
|
// Custom hook to check docling health status
|
||||||
|
export function useDoclingHealth() {
|
||||||
const { data: health, isLoading, isError } = useDoclingHealthQuery();
|
const { data: health, isLoading, isError } = useDoclingHealthQuery();
|
||||||
const [showDialog, setShowDialog] = useState(false);
|
|
||||||
|
|
||||||
const isHealthy = health?.status === "healthy" && !isError;
|
const isHealthy = health?.status === "healthy" && !isError;
|
||||||
const isUnhealthy = health?.status === "unhealthy" || isError;
|
const isUnhealthy = health?.status === "unhealthy" || isError;
|
||||||
|
|
||||||
|
return {
|
||||||
|
health,
|
||||||
|
isLoading,
|
||||||
|
isError,
|
||||||
|
isHealthy,
|
||||||
|
isUnhealthy,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function DoclingHealthBanner({ className }: DoclingHealthBannerProps) {
|
||||||
|
const { isLoading, isHealthy, isUnhealthy } = useDoclingHealth();
|
||||||
|
const [showDialog, setShowDialog] = useState(false);
|
||||||
|
|
||||||
// Only show banner when service is unhealthy
|
// Only show banner when service is unhealthy
|
||||||
if (isLoading || isHealthy) {
|
if (isLoading || isHealthy) {
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import {
|
||||||
type OnboardingVariables,
|
type OnboardingVariables,
|
||||||
useOnboardingMutation,
|
useOnboardingMutation,
|
||||||
} from "@/app/api/mutations/useOnboardingMutation";
|
} from "@/app/api/mutations/useOnboardingMutation";
|
||||||
|
import { DoclingHealthBanner, useDoclingHealth } from "@/components/docling-health-banner";
|
||||||
import IBMLogo from "@/components/logo/ibm-logo";
|
import IBMLogo from "@/components/logo/ibm-logo";
|
||||||
import OllamaLogo from "@/components/logo/ollama-logo";
|
import OllamaLogo from "@/components/logo/ollama-logo";
|
||||||
import OpenAILogo from "@/components/logo/openai-logo";
|
import OpenAILogo from "@/components/logo/openai-logo";
|
||||||
|
|
@ -34,6 +35,7 @@ import { OpenAIOnboarding } from "./components/openai-onboarding";
|
||||||
function OnboardingPage() {
|
function OnboardingPage() {
|
||||||
const { data: settingsDb, isLoading: isSettingsLoading } =
|
const { data: settingsDb, isLoading: isSettingsLoading } =
|
||||||
useGetSettingsQuery();
|
useGetSettingsQuery();
|
||||||
|
const { isHealthy: isDoclingHealthy } = useDoclingHealth();
|
||||||
|
|
||||||
const redirect = "/";
|
const redirect = "/";
|
||||||
|
|
||||||
|
|
@ -114,7 +116,7 @@ function OnboardingPage() {
|
||||||
onboardingMutation.mutate(onboardingData);
|
onboardingMutation.mutate(onboardingData);
|
||||||
};
|
};
|
||||||
|
|
||||||
const isComplete = !!settings.llm_model && !!settings.embedding_model;
|
const isComplete = !!settings.llm_model && !!settings.embedding_model && isDoclingHealthy;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="min-h-dvh w-full flex gap-5 flex-col items-center justify-center bg-background relative p-4">
|
<div className="min-h-dvh w-full flex gap-5 flex-col items-center justify-center bg-background relative p-4">
|
||||||
|
|
@ -130,6 +132,8 @@ function OnboardingPage() {
|
||||||
)}
|
)}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
<DoclingHealthBanner className="absolute top-0 left-0 right-0 w-full z-20" />
|
||||||
|
|
||||||
<div className="flex flex-col items-center gap-5 min-h-[550px] w-full z-10">
|
<div className="flex flex-col items-center gap-5 min-h-[550px] w-full z-10">
|
||||||
<div className="flex flex-col items-center justify-center gap-4">
|
<div className="flex flex-col items-center justify-center gap-4">
|
||||||
<h1 className="text-2xl font-medium font-chivo">
|
<h1 className="text-2xl font-medium font-chivo">
|
||||||
|
|
@ -197,7 +201,9 @@ function OnboardingPage() {
|
||||||
</TooltipTrigger>
|
</TooltipTrigger>
|
||||||
{!isComplete && (
|
{!isComplete && (
|
||||||
<TooltipContent>
|
<TooltipContent>
|
||||||
Please fill in all required fields
|
{!!settings.llm_model && !!settings.embedding_model && !isDoclingHealthy
|
||||||
|
? "docling-serve must be running to continue"
|
||||||
|
: "Please fill in all required fields"}
|
||||||
</TooltipContent>
|
</TooltipContent>
|
||||||
)}
|
)}
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
|
|
|
||||||
|
|
@ -536,7 +536,142 @@ async def onboarding(request, flows_service):
|
||||||
{"error": "No valid fields provided for update"}, status_code=400
|
{"error": "No valid fields provided for update"}, status_code=400
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Initialize the OpenSearch index now that we have the embedding model configured
|
||||||
|
try:
|
||||||
|
# Import here to avoid circular imports
|
||||||
|
from main import init_index
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Initializing OpenSearch index after onboarding configuration"
|
||||||
|
)
|
||||||
|
await init_index()
|
||||||
|
logger.info("OpenSearch index initialization completed successfully")
|
||||||
|
except Exception as e:
|
||||||
|
if isinstance(e, ValueError):
|
||||||
|
logger.error(
|
||||||
|
"Failed to initialize OpenSearch index after onboarding",
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
return JSONResponse(
|
||||||
|
{
|
||||||
|
"error": str(e),
|
||||||
|
"edited": True,
|
||||||
|
},
|
||||||
|
status_code=400,
|
||||||
|
)
|
||||||
|
logger.error(
|
||||||
|
"Failed to initialize OpenSearch index after onboarding",
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
# Don't fail the entire onboarding process if index creation fails
|
||||||
|
# The application can still work, but document operations may fail
|
||||||
|
|
||||||
# Save the updated configuration (this will mark it as edited)
|
# Save the updated configuration (this will mark it as edited)
|
||||||
|
|
||||||
|
# If model_provider was updated, assign the new provider to flows
|
||||||
|
if "model_provider" in body:
|
||||||
|
provider = body["model_provider"].strip().lower()
|
||||||
|
try:
|
||||||
|
flow_result = await flows_service.assign_model_provider(provider)
|
||||||
|
|
||||||
|
if flow_result.get("success"):
|
||||||
|
logger.info(
|
||||||
|
f"Successfully assigned {provider} to flows",
|
||||||
|
flow_result=flow_result,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"Failed to assign {provider} to flows",
|
||||||
|
flow_result=flow_result,
|
||||||
|
)
|
||||||
|
# Continue even if flow assignment fails - configuration was still saved
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"Error assigning model provider to flows",
|
||||||
|
provider=provider,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Set Langflow global variables based on provider
|
||||||
|
try:
|
||||||
|
# Set API key for IBM/Watson providers
|
||||||
|
if (provider == "watsonx") and "api_key" in body:
|
||||||
|
api_key = body["api_key"]
|
||||||
|
await clients._create_langflow_global_variable(
|
||||||
|
"WATSONX_API_KEY", api_key, modify=True
|
||||||
|
)
|
||||||
|
logger.info("Set WATSONX_API_KEY global variable in Langflow")
|
||||||
|
|
||||||
|
# Set project ID for IBM/Watson providers
|
||||||
|
if (provider == "watsonx") and "project_id" in body:
|
||||||
|
project_id = body["project_id"]
|
||||||
|
await clients._create_langflow_global_variable(
|
||||||
|
"WATSONX_PROJECT_ID", project_id, modify=True
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Set WATSONX_PROJECT_ID global variable in Langflow"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set API key for OpenAI provider
|
||||||
|
if provider == "openai" and "api_key" in body:
|
||||||
|
api_key = body["api_key"]
|
||||||
|
await clients._create_langflow_global_variable(
|
||||||
|
"OPENAI_API_KEY", api_key, modify=True
|
||||||
|
)
|
||||||
|
logger.info("Set OPENAI_API_KEY global variable in Langflow")
|
||||||
|
|
||||||
|
# Set base URL for Ollama provider
|
||||||
|
if provider == "ollama" and "endpoint" in body:
|
||||||
|
endpoint = transform_localhost_url(body["endpoint"])
|
||||||
|
|
||||||
|
await clients._create_langflow_global_variable(
|
||||||
|
"OLLAMA_BASE_URL", endpoint, modify=True
|
||||||
|
)
|
||||||
|
logger.info("Set OLLAMA_BASE_URL global variable in Langflow")
|
||||||
|
|
||||||
|
await flows_service.change_langflow_model_value(
|
||||||
|
provider,
|
||||||
|
body["embedding_model"],
|
||||||
|
body["llm_model"],
|
||||||
|
body["endpoint"],
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"Failed to set Langflow global variables",
|
||||||
|
provider=provider,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Handle sample data ingestion if requested
|
||||||
|
if should_ingest_sample_data:
|
||||||
|
try:
|
||||||
|
# Import the function here to avoid circular imports
|
||||||
|
from main import ingest_default_documents_when_ready
|
||||||
|
|
||||||
|
# Get services from the current app state
|
||||||
|
# We need to access the app instance to get services
|
||||||
|
app = request.scope.get("app")
|
||||||
|
if app and hasattr(app.state, "services"):
|
||||||
|
services = app.state.services
|
||||||
|
logger.info(
|
||||||
|
"Starting sample data ingestion as requested in onboarding"
|
||||||
|
)
|
||||||
|
await ingest_default_documents_when_ready(services)
|
||||||
|
logger.info("Sample data ingestion completed successfully")
|
||||||
|
else:
|
||||||
|
logger.error(
|
||||||
|
"Could not access services for sample data ingestion"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"Failed to complete sample data ingestion", error=str(e)
|
||||||
|
)
|
||||||
|
# Don't fail the entire onboarding process if sample data fails
|
||||||
if config_manager.save_config_file(current_config):
|
if config_manager.save_config_file(current_config):
|
||||||
updated_fields = [
|
updated_fields = [
|
||||||
k for k in body.keys() if k != "sample_data"
|
k for k in body.keys() if k != "sample_data"
|
||||||
|
|
@ -546,144 +681,19 @@ async def onboarding(request, flows_service):
|
||||||
updated_fields=updated_fields,
|
updated_fields=updated_fields,
|
||||||
)
|
)
|
||||||
|
|
||||||
# If model_provider was updated, assign the new provider to flows
|
|
||||||
if "model_provider" in body:
|
|
||||||
provider = body["model_provider"].strip().lower()
|
|
||||||
try:
|
|
||||||
flow_result = await flows_service.assign_model_provider(provider)
|
|
||||||
|
|
||||||
if flow_result.get("success"):
|
|
||||||
logger.info(
|
|
||||||
f"Successfully assigned {provider} to flows",
|
|
||||||
flow_result=flow_result,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
f"Failed to assign {provider} to flows",
|
|
||||||
flow_result=flow_result,
|
|
||||||
)
|
|
||||||
# Continue even if flow assignment fails - configuration was still saved
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(
|
|
||||||
"Error assigning model provider to flows",
|
|
||||||
provider=provider,
|
|
||||||
error=str(e),
|
|
||||||
)
|
|
||||||
# Continue even if flow assignment fails - configuration was still saved
|
|
||||||
|
|
||||||
# Set Langflow global variables based on provider
|
|
||||||
if "model_provider" in body:
|
|
||||||
provider = body["model_provider"].strip().lower()
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Set API key for IBM/Watson providers
|
|
||||||
if (provider == "watsonx") and "api_key" in body:
|
|
||||||
api_key = body["api_key"]
|
|
||||||
await clients._create_langflow_global_variable(
|
|
||||||
"WATSONX_API_KEY", api_key, modify=True
|
|
||||||
)
|
|
||||||
logger.info("Set WATSONX_API_KEY global variable in Langflow")
|
|
||||||
|
|
||||||
# Set project ID for IBM/Watson providers
|
|
||||||
if (provider == "watsonx") and "project_id" in body:
|
|
||||||
project_id = body["project_id"]
|
|
||||||
await clients._create_langflow_global_variable(
|
|
||||||
"WATSONX_PROJECT_ID", project_id, modify=True
|
|
||||||
)
|
|
||||||
logger.info(
|
|
||||||
"Set WATSONX_PROJECT_ID global variable in Langflow"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Set API key for OpenAI provider
|
|
||||||
if provider == "openai" and "api_key" in body:
|
|
||||||
api_key = body["api_key"]
|
|
||||||
await clients._create_langflow_global_variable(
|
|
||||||
"OPENAI_API_KEY", api_key, modify=True
|
|
||||||
)
|
|
||||||
logger.info("Set OPENAI_API_KEY global variable in Langflow")
|
|
||||||
|
|
||||||
# Set base URL for Ollama provider
|
|
||||||
if provider == "ollama" and "endpoint" in body:
|
|
||||||
endpoint = transform_localhost_url(body["endpoint"])
|
|
||||||
|
|
||||||
await clients._create_langflow_global_variable(
|
|
||||||
"OLLAMA_BASE_URL", endpoint, modify=True
|
|
||||||
)
|
|
||||||
logger.info("Set OLLAMA_BASE_URL global variable in Langflow")
|
|
||||||
|
|
||||||
await flows_service.change_langflow_model_value(
|
|
||||||
provider,
|
|
||||||
body["embedding_model"],
|
|
||||||
body["llm_model"],
|
|
||||||
body["endpoint"],
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(
|
|
||||||
"Failed to set Langflow global variables",
|
|
||||||
provider=provider,
|
|
||||||
error=str(e),
|
|
||||||
)
|
|
||||||
# Continue even if setting global variables fails
|
|
||||||
|
|
||||||
# Initialize the OpenSearch index now that we have the embedding model configured
|
|
||||||
try:
|
|
||||||
# Import here to avoid circular imports
|
|
||||||
from main import init_index
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"Initializing OpenSearch index after onboarding configuration"
|
|
||||||
)
|
|
||||||
await init_index()
|
|
||||||
logger.info("OpenSearch index initialization completed successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(
|
|
||||||
"Failed to initialize OpenSearch index after onboarding",
|
|
||||||
error=str(e),
|
|
||||||
)
|
|
||||||
# Don't fail the entire onboarding process if index creation fails
|
|
||||||
# The application can still work, but document operations may fail
|
|
||||||
|
|
||||||
# Handle sample data ingestion if requested
|
|
||||||
if should_ingest_sample_data:
|
|
||||||
try:
|
|
||||||
# Import the function here to avoid circular imports
|
|
||||||
from main import ingest_default_documents_when_ready
|
|
||||||
|
|
||||||
# Get services from the current app state
|
|
||||||
# We need to access the app instance to get services
|
|
||||||
app = request.scope.get("app")
|
|
||||||
if app and hasattr(app.state, "services"):
|
|
||||||
services = app.state.services
|
|
||||||
logger.info(
|
|
||||||
"Starting sample data ingestion as requested in onboarding"
|
|
||||||
)
|
|
||||||
await ingest_default_documents_when_ready(services)
|
|
||||||
logger.info("Sample data ingestion completed successfully")
|
|
||||||
else:
|
|
||||||
logger.error(
|
|
||||||
"Could not access services for sample data ingestion"
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(
|
|
||||||
"Failed to complete sample data ingestion", error=str(e)
|
|
||||||
)
|
|
||||||
# Don't fail the entire onboarding process if sample data fails
|
|
||||||
|
|
||||||
return JSONResponse(
|
|
||||||
{
|
|
||||||
"message": "Onboarding configuration updated successfully",
|
|
||||||
"edited": True, # Confirm that config is now marked as edited
|
|
||||||
"sample_data_ingested": should_ingest_sample_data,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
{"error": "Failed to save configuration"}, status_code=500
|
{"error": "Failed to save configuration"}, status_code=500
|
||||||
)
|
)
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
{
|
||||||
|
"message": "Onboarding configuration updated successfully",
|
||||||
|
"edited": True, # Confirm that config is now marked as edited
|
||||||
|
"sample_data_ingested": should_ingest_sample_data,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Failed to update onboarding settings", error=str(e))
|
logger.error("Failed to update onboarding settings", error=str(e))
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
|
|
|
||||||
|
|
@ -81,12 +81,6 @@ OPENAI_EMBEDDING_DIMENSIONS = {
|
||||||
"text-embedding-ada-002": 1536,
|
"text-embedding-ada-002": 1536,
|
||||||
}
|
}
|
||||||
|
|
||||||
OLLAMA_EMBEDDING_DIMENSIONS = {
|
|
||||||
"nomic-embed-text": 768,
|
|
||||||
"all-minilm": 384,
|
|
||||||
"mxbai-embed-large": 1024,
|
|
||||||
}
|
|
||||||
|
|
||||||
WATSONX_EMBEDDING_DIMENSIONS = {
|
WATSONX_EMBEDDING_DIMENSIONS = {
|
||||||
# IBM Models
|
# IBM Models
|
||||||
"ibm/granite-embedding-107m-multilingual": 384,
|
"ibm/granite-embedding-107m-multilingual": 384,
|
||||||
|
|
|
||||||
|
|
@ -168,7 +168,12 @@ async def init_index():
|
||||||
embedding_model = config.knowledge.embedding_model
|
embedding_model = config.knowledge.embedding_model
|
||||||
|
|
||||||
# Create dynamic index body based on the configured embedding model
|
# Create dynamic index body based on the configured embedding model
|
||||||
dynamic_index_body = create_dynamic_index_body(embedding_model)
|
# Pass provider and endpoint for dynamic dimension resolution (Ollama probing)
|
||||||
|
dynamic_index_body = await create_dynamic_index_body(
|
||||||
|
embedding_model,
|
||||||
|
provider=config.provider.model_provider,
|
||||||
|
endpoint=config.provider.endpoint
|
||||||
|
)
|
||||||
|
|
||||||
# Create documents index
|
# Create documents index
|
||||||
if not await clients.opensearch.indices.exists(index=INDEX_NAME):
|
if not await clients.opensearch.indices.exists(index=INDEX_NAME):
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,128 @@
|
||||||
from config.settings import OLLAMA_EMBEDDING_DIMENSIONS, OPENAI_EMBEDDING_DIMENSIONS, VECTOR_DIM, WATSONX_EMBEDDING_DIMENSIONS
|
import httpx
|
||||||
|
from config.settings import OPENAI_EMBEDDING_DIMENSIONS, VECTOR_DIM, WATSONX_EMBEDDING_DIMENSIONS
|
||||||
|
from utils.container_utils import transform_localhost_url
|
||||||
from utils.logging_config import get_logger
|
from utils.logging_config import get_logger
|
||||||
|
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
def get_embedding_dimensions(model_name: str) -> int:
|
|
||||||
|
async def _probe_ollama_embedding_dimension(endpoint: str, model_name: str) -> int:
|
||||||
|
"""Probe Ollama server to get embedding dimension for a model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
endpoint: Ollama server endpoint (e.g., "http://localhost:11434")
|
||||||
|
model_name: Name of the embedding model
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The embedding dimension.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the dimension cannot be determined.
|
||||||
|
"""
|
||||||
|
transformed_endpoint = transform_localhost_url(endpoint)
|
||||||
|
url = f"{transformed_endpoint}/api/embeddings"
|
||||||
|
test_input = "test"
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
errors: list[str] = []
|
||||||
|
|
||||||
|
# Try modern API format first (input parameter)
|
||||||
|
modern_payload = {
|
||||||
|
"model": model_name,
|
||||||
|
"input": test_input,
|
||||||
|
"prompt": test_input,
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await client.post(url, json=modern_payload, timeout=10.0)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# Check for embedding in response
|
||||||
|
if "embedding" in data:
|
||||||
|
dimension = len(data["embedding"])
|
||||||
|
if dimension > 0:
|
||||||
|
logger.info(
|
||||||
|
f"Probed Ollama model '{model_name}': dimension={dimension}"
|
||||||
|
)
|
||||||
|
return dimension
|
||||||
|
elif "embeddings" in data and len(data["embeddings"]) > 0:
|
||||||
|
dimension = len(data["embeddings"][0])
|
||||||
|
if dimension > 0:
|
||||||
|
logger.info(
|
||||||
|
f"Probed Ollama model '{model_name}': dimension={dimension}"
|
||||||
|
)
|
||||||
|
return dimension
|
||||||
|
|
||||||
|
errors.append("response did not include non-zero embedding vector")
|
||||||
|
except Exception as modern_error: # noqa: BLE001 - log and fall back to legacy payload
|
||||||
|
logger.debug(
|
||||||
|
"Modern Ollama embeddings API probe failed",
|
||||||
|
model=model_name,
|
||||||
|
endpoint=transformed_endpoint,
|
||||||
|
error=str(modern_error),
|
||||||
|
)
|
||||||
|
errors.append(str(modern_error))
|
||||||
|
|
||||||
|
# Try legacy API format (prompt parameter)
|
||||||
|
legacy_payload = {
|
||||||
|
"model": model_name,
|
||||||
|
"prompt": test_input,
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await client.post(url, json=legacy_payload, timeout=10.0)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
if "embedding" in data:
|
||||||
|
dimension = len(data["embedding"])
|
||||||
|
if dimension > 0:
|
||||||
|
logger.info(
|
||||||
|
f"Probed Ollama model '{model_name}' (legacy): dimension={dimension}"
|
||||||
|
)
|
||||||
|
return dimension
|
||||||
|
elif "embeddings" in data and len(data["embeddings"]) > 0:
|
||||||
|
dimension = len(data["embeddings"][0])
|
||||||
|
if dimension > 0:
|
||||||
|
logger.info(
|
||||||
|
f"Probed Ollama model '{model_name}' (legacy): dimension={dimension}"
|
||||||
|
)
|
||||||
|
return dimension
|
||||||
|
|
||||||
|
errors.append("legacy response did not include non-zero embedding vector")
|
||||||
|
except Exception as legacy_error: # noqa: BLE001 - collect and raise a helpful error later
|
||||||
|
logger.warning(
|
||||||
|
"Legacy Ollama embeddings API probe failed",
|
||||||
|
model=model_name,
|
||||||
|
endpoint=transformed_endpoint,
|
||||||
|
error=str(legacy_error),
|
||||||
|
)
|
||||||
|
errors.append(str(legacy_error))
|
||||||
|
|
||||||
|
# remove the first instance of this error to show either it or the actual error from any of the two methods
|
||||||
|
errors.remove("All connection attempts failed")
|
||||||
|
|
||||||
|
raise ValueError(
|
||||||
|
f"Failed to determine embedding dimensions for Ollama model '{model_name}'. "
|
||||||
|
f"Verify the Ollama server at '{endpoint}' is reachable and the model is available. "
|
||||||
|
f"Error: {errors[0]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_embedding_dimensions(model_name: str, provider: str = None, endpoint: str = None) -> int:
|
||||||
"""Get the embedding dimensions for a given model name."""
|
"""Get the embedding dimensions for a given model name."""
|
||||||
|
|
||||||
|
if provider and provider.lower() == "ollama":
|
||||||
|
if not endpoint:
|
||||||
|
raise ValueError(
|
||||||
|
"Ollama endpoint is required to determine embedding dimensions. Please provide a valid endpoint."
|
||||||
|
)
|
||||||
|
return await _probe_ollama_embedding_dimension(endpoint, model_name)
|
||||||
|
|
||||||
# Check all model dictionaries
|
# Check all model dictionaries
|
||||||
all_models = {**OPENAI_EMBEDDING_DIMENSIONS, **OLLAMA_EMBEDDING_DIMENSIONS, **WATSONX_EMBEDDING_DIMENSIONS}
|
all_models = {**OPENAI_EMBEDDING_DIMENSIONS, **WATSONX_EMBEDDING_DIMENSIONS}
|
||||||
|
|
||||||
model_name = model_name.lower().strip().split(":")[0]
|
model_name = model_name.lower().strip().split(":")[0]
|
||||||
|
|
||||||
|
|
@ -23,9 +137,22 @@ def get_embedding_dimensions(model_name: str) -> int:
|
||||||
return VECTOR_DIM
|
return VECTOR_DIM
|
||||||
|
|
||||||
|
|
||||||
def create_dynamic_index_body(embedding_model: str) -> dict:
|
async def create_dynamic_index_body(
|
||||||
"""Create a dynamic index body configuration based on the embedding model."""
|
embedding_model: str,
|
||||||
dimensions = get_embedding_dimensions(embedding_model)
|
provider: str = None,
|
||||||
|
endpoint: str = None
|
||||||
|
) -> dict:
|
||||||
|
"""Create a dynamic index body configuration based on the embedding model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
embedding_model: Name of the embedding model
|
||||||
|
provider: Provider name (e.g., "ollama", "openai", "watsonx")
|
||||||
|
endpoint: Endpoint URL for the provider (used for Ollama probing)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
OpenSearch index body configuration
|
||||||
|
"""
|
||||||
|
dimensions = await get_embedding_dimensions(embedding_model, provider, endpoint)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"settings": {
|
"settings": {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue