Adds initial LightRAG app integration with schema and processors

Introduces the LightRAG Retrieval-Augmented Generation framework as an Apolo app, including input/output schemas, types, and processors. Adds Helm chart value processing, environment and persistence configurations, and output service discovery for deployment. Includes scripts for generating type schemas and testing support, along with CI and linting setup tailored for the new app. Provides a documentation loader script to ingest markdown files into LightRAG with flexible referencing modes. Relates to MLO-469
2025-11-03 13:59:52 +02:00 · 2025-11-03 13:59:52 +02:00 · 2255b91210
commit 2255b91210
parent 748ded40fb
27 changed files with 5166 additions and 66 deletions
--- a/.apolo/init.py
+++ b/.apolo/init.py
--- a/.apolo/applications.yaml
+++ b/.apolo/applications.yaml
@ -0,0 +1,55 @@
+- app_type: lightrag
+  name: lightrag
+  title: LightRAG
+  install_type: workflow
+  helm_path: k8s-deploy/lightrag
+  app_package_name: apolo_apps_lightrag
+  inputs:
+    schema_path: .apolo/src/apolo_apps_lightrag/schemas/LightRAGAppInputs.json
+    types_name: LightRAGAppInputs
+    processor: LightRAGInputsProcessor
+    image: ghcr.io/neuro-inc/app-lightrag
+  outputs:
+    schema_path: .apolo/src/apolo_apps_lightrag/schemas/LightRAGAppOutputs.json
+    types_name: LightRAGAppOutputs
+    processor: LightRAGOutputsProcessor
+    image: ghcr.io/neuro-inc/app-lightrag
+  short_description: Advanced RAG framework with graph-enhanced retrieval capabilities
+  description: |
+    LightRAG is a simple and fast Retrieval-Augmented Generation (RAG) system that incorporates
+    graph structure into text indexing and retrieval processes. Unlike traditional RAG approaches,
+    LightRAG utilizes both low-level and high-level knowledge discovery to enhance text retrieval.
+    It supports both naive and local search methods, making it suitable for comprehensive
+    question-answering tasks. The system includes a user-friendly web interface for document
+    management and querying, with built-in support for various LLM providers and embedding models.
+  pub_date: "2025-06-25T00:00:00+00:00"
+  logo: https://storage.googleapis.com/development-421920-assets/app-logos/lightrag-logo.png
+  tags:
+    - "RAG"
+    - "LightRAG"
+    - "Knowledge Graph"
+    - "Vector Search"
+    - "Document Processing"
+    - "LLM"
+    - "Embeddings"
+    - "PostgreSQL"
+    - "Graph"
+    - "AI"
+    - "NLP"
+  assets:
+    - type: image
+      url: https://storage.googleapis.com/development-421920-assets/app-logos/lightrag-banner.png
+    - type: video
+      url: https://www.youtube.com/watch?v=oageL-1I0GE
+    - type: pdf
+      url: https://arxiv.org/abs/2410.05779
+  urls:
+    - name: LightRAG GitHub Repository
+      type: documentation
+      url: https://github.com/HKUDS/LightRAG
+    - name: LightRAG DeepWiki Documentation
+      type: documentation
+      url: https://deepwiki.com/HKUDS/LightRAG
+    - name: LightRAG Official Repository
+      type: external
+      url: https://github.com/HKUDS/LightRAG
--- a/.apolo/project.yaml
+++ b/.apolo/project.yaml
@ -0,0 +1 @@
+id: lightrag
--- a/.apolo/scripts/gen_types_schemas.mk
+++ b/.apolo/scripts/gen_types_schemas.mk
@ -0,0 +1,3 @@
+.PHONY: gen-types-schemas
+gen-types-schemas:
+	@.apolo/scripts/gen_types_schemas.sh
--- a/.apolo/scripts/gen_types_schemas.sh
+++ b/.apolo/scripts/gen_types_schemas.sh
@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+APP_PACKAGE_DIR=".apolo/src/apolo_apps_lightrag"
+
+INPUT_SCHEMA="${APP_PACKAGE_DIR}/schemas/LightRAGAppInputs.json"
+OUTPUT_SCHEMA="${APP_PACKAGE_DIR}/schemas/LightRAGAppOutputs.json"
+
+if command -v poetry >/dev/null 2>&1; then
+    APP_TYPES_CMD=(poetry run app-types)
+elif [[ -x "${REPO_ROOT}/.venv/bin/app-types" ]]; then
+    APP_TYPES_CMD=("${REPO_ROOT}/.venv/bin/app-types")
+elif command -v app-types >/dev/null 2>&1; then
+    APP_TYPES_CMD=(app-types)
+else
+    echo "app-types CLI not found. Install dependencies via 'poetry install --with dev'." >&2
+    exit 1
+fi
+
+(
+    cd "${REPO_ROOT}"
+    "${APP_TYPES_CMD[@]}" dump-types-schema "${APP_PACKAGE_DIR}" LightRAGAppInputs "${INPUT_SCHEMA}"
+    "${APP_TYPES_CMD[@]}" dump-types-schema "${APP_PACKAGE_DIR}" LightRAGAppOutputs "${OUTPUT_SCHEMA}"
+)
--- a/.apolo/src/apolo_apps_lightrag/init.py
+++ b/.apolo/src/apolo_apps_lightrag/init.py
@ -0,0 +1,15 @@
+from apolo_apps_lightrag.inputs_processor import (
+    LightRAGInputsProcessor,
+)
+from apolo_apps_lightrag.outputs_processor import (
+    LightRAGOutputsProcessor,
+)
+from apolo_apps_lightrag.types import LightRAGAppInputs, LightRAGAppOutputs
+
+
+__all__ = [
+    "LightRAGInputsProcessor",
+    "LightRAGOutputsProcessor",
+    "LightRAGAppInputs",
+    "LightRAGAppOutputs",
+]
--- a/.apolo/src/apolo_apps_lightrag/inputs_processor.py
+++ b/.apolo/src/apolo_apps_lightrag/inputs_processor.py
@ -0,0 +1,246 @@
+import logging
+import typing as t
+
+from apolo_app_types.app_types import AppType
+from apolo_app_types.helm.apps.base import BaseChartValueProcessor
+from apolo_app_types.helm.apps.common import gen_extra_values
+from apolo_app_types.helm.utils.deep_merging import merge_list_of_dicts
+from apolo_app_types.protocols.common.openai_compat import (
+    OpenAICompatChatAPI,
+    OpenAICompatEmbeddingsAPI,
+)
+from apolo_app_types.protocols.common.secrets_ import serialize_optional_secret
+
+from .types import (
+    AnthropicLLMProvider,
+    GeminiLLMProvider,
+    LightRAGAppInputs,
+    OllamaEmbeddingProvider,
+    OllamaLLMProvider,
+    OpenAIEmbeddingProvider,
+    OpenAILLMProvider,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+class LightRAGInputsProcessor(BaseChartValueProcessor[LightRAGAppInputs]):
+    def _extract_llm_config(self, llm_config: t.Any) -> dict[str, t.Any]:
+        """Extract LLM configuration from provider-specific config."""
+        if isinstance(llm_config, OpenAICompatChatAPI):
+            if not llm_config.hf_model:
+                msg = "OpenAI compatible chat API must have hf_model configured"
+                raise ValueError(msg)
+            model = llm_config.hf_model.model_hf_name
+            host = llm_config.complete_url
+            return {
+                "binding": "openai",
+                "model": model,
+                "host": host,
+                "api_key": getattr(llm_config, "api_key", None),
+            }
+        if isinstance(llm_config, OpenAILLMProvider):
+            host = llm_config.complete_url
+            return {
+                "binding": "openai",
+                "model": llm_config.model,
+                "host": host,
+                "api_key": llm_config.api_key,
+            }
+        if isinstance(llm_config, AnthropicLLMProvider):
+            host = llm_config.complete_url
+            return {
+                "binding": "anthropic",
+                "model": llm_config.model,
+                "host": host,
+                "api_key": llm_config.api_key,
+            }
+        if isinstance(llm_config, OllamaLLMProvider):
+            host = llm_config.complete_url
+            return {
+                "binding": "ollama",
+                "model": llm_config.model,
+                "host": host,
+                "api_key": None,
+            }
+        if isinstance(llm_config, GeminiLLMProvider):
+            host = llm_config.complete_url
+            return {
+                "binding": "gemini",
+                "model": llm_config.model,
+                "host": host,
+                "api_key": llm_config.api_key,
+            }
+        binding = getattr(llm_config, "provider", "openai")
+        model = getattr(llm_config, "model", "gpt-4o-mini")
+        api_key = getattr(llm_config, "api_key", None)
+        host = ""
+        if hasattr(llm_config, "complete_url"):
+            host = llm_config.complete_url
+        elif hasattr(llm_config, "host") and llm_config.host:
+            protocol = getattr(llm_config, "protocol", "https")
+            port = getattr(llm_config, "port", 443)
+            host = f"{protocol}://{llm_config.host}:{port}"
+        return {"binding": binding, "model": model, "host": host, "api_key": api_key}
+
+    def _extract_embedding_config(self, embedding_config: t.Any) -> dict[str, t.Any]:
+        """Extract embedding configuration from provider-specific config."""
+        if isinstance(embedding_config, OpenAICompatEmbeddingsAPI):
+            if embedding_config.hf_model is None:
+                msg = "OpenAI compatible embeddings API must have hf_model configured"
+                raise ValueError(msg)
+            model = embedding_config.hf_model.model_hf_name
+            host = embedding_config.complete_url
+            return {
+                "binding": "openai",
+                "model": model,
+                "api_key": getattr(embedding_config, "api_key", None),
+                "dimensions": 1536,
+                "host": host,
+            }
+        if isinstance(embedding_config, OpenAIEmbeddingProvider):
+            host = embedding_config.complete_url
+            return {
+                "binding": "openai",
+                "model": embedding_config.model,
+                "api_key": embedding_config.api_key,
+                "dimensions": 1536,
+                "host": host,
+            }
+        if isinstance(embedding_config, OllamaEmbeddingProvider):
+            host = embedding_config.complete_url
+            return {
+                "binding": "ollama",
+                "model": embedding_config.model,
+                "api_key": None,
+                "dimensions": 1024,
+                "host": host,
+            }
+        binding = getattr(embedding_config, "provider", "openai")
+        model = getattr(embedding_config, "model", "text-embedding-ada-002")
+        api_key = getattr(embedding_config, "api_key", None)
+        dimensions = 1536
+        if hasattr(embedding_config, "dimensions"):
+            dimensions = embedding_config.dimensions
+        host = ""
+        if hasattr(embedding_config, "complete_url"):
+            host = embedding_config.complete_url
+        elif hasattr(embedding_config, "host") and embedding_config.host:
+            protocol = getattr(embedding_config, "protocol", "https")
+            port = getattr(embedding_config, "port", 443)
+            host = f"{protocol}://{embedding_config.host}:{port}"
+        return {
+            "binding": binding,
+            "model": model,
+            "api_key": api_key,
+            "dimensions": dimensions,
+            "host": host,
+        }
+
+    async def _get_environment_values(
+        self,
+        input_: LightRAGAppInputs,
+        app_secrets_name: str,
+    ) -> dict[str, t.Any]:
+        llm_config = self._extract_llm_config(input_.llm_config)
+        embedding_config = self._extract_embedding_config(input_.embedding_config)
+        env_config = {
+            "HOST": "0.0.0.0",
+            "PORT": 9621,
+            "WEBUI_TITLE": "Graph RAG Engine",
+            "WEBUI_DESCRIPTION": "Simple and Fast Graph Based RAG System",
+            "LLM_BINDING": llm_config["binding"],
+            "LLM_MODEL": llm_config["model"],
+            "LLM_BINDING_HOST": llm_config["host"],
+            "LLM_BINDING_API_KEY": serialize_optional_secret(
+                llm_config["api_key"], app_secrets_name
+            ),
+            "OPENAI_API_KEY": serialize_optional_secret(
+                llm_config["api_key"], app_secrets_name
+            )
+            or "",
+            "EMBEDDING_BINDING": embedding_config["binding"],
+            "EMBEDDING_MODEL": embedding_config["model"],
+            "EMBEDDING_DIM": embedding_config["dimensions"],
+            "EMBEDDING_BINDING_HOST": embedding_config["host"],
+            "EMBEDDING_BINDING_API_KEY": serialize_optional_secret(
+                embedding_config["api_key"], app_secrets_name
+            )
+            or "",
+            "LIGHTRAG_KV_STORAGE": "PGKVStorage",
+            "LIGHTRAG_VECTOR_STORAGE": "PGVectorStorage",
+            "LIGHTRAG_DOC_STATUS_STORAGE": "PGDocStatusStorage",
+            "LIGHTRAG_GRAPH_STORAGE": "NetworkXStorage",
+            "POSTGRES_HOST": input_.pgvector_user.pgbouncer_host,
+            "POSTGRES_PORT": input_.pgvector_user.pgbouncer_port,
+            "POSTGRES_USER": input_.pgvector_user.user,
+            "POSTGRES_PASSWORD": input_.pgvector_user.password,
+            "POSTGRES_DATABASE": input_.pgvector_user.dbname,
+            "POSTGRES_WORKSPACE": "default",
+        }
+        return {"env": env_config}
+
+    async def _get_persistence_values(
+        self,
+        input_: LightRAGAppInputs,
+    ) -> dict[str, t.Any]:
+        return {
+            "persistence": {
+                "enabled": True,
+                "ragStorage": {
+                    "size": f"{input_.persistence.rag_storage_size}Gi",
+                },
+                "inputs": {
+                    "size": f"{input_.persistence.inputs_storage_size}Gi",
+                },
+            }
+        }
+
+    async def gen_extra_values(
+        self,
+        input_: LightRAGAppInputs,
+        app_name: str,
+        namespace: str,
+        app_id: str,
+        app_secrets_name: str,
+        *_: t.Any,
+        **kwargs: t.Any,
+    ) -> dict[str, t.Any]:
+        env_values = await self._get_environment_values(input_, app_secrets_name)
+        persistence_values = await self._get_persistence_values(input_)
+        platform_values = await gen_extra_values(
+            apolo_client=self.client,
+            preset_type=input_.preset,
+            ingress_http=input_.ingress_http,
+            ingress_grpc=None,
+            namespace=namespace,
+            app_id=app_id,
+            app_type=AppType.LightRAG,
+        )
+        base_values = {
+            "replicaCount": 1,
+            "image": {
+                "repository": "ghcr.io/hkuds/lightrag",
+                "tag": "1.3.8",
+                "pullPolicy": "IfNotPresent",
+            },
+            "service": {
+                "type": "ClusterIP",
+                "port": 9621,
+            },
+            "nameOverride": "",
+            "fullnameOverride": app_name,
+        }
+        logger.debug("Generated LightRAG values for app %s", app_name)
+        return merge_list_of_dicts(
+            [
+                base_values,
+                env_values,
+                persistence_values,
+                platform_values,
+            ]
+        )
+
+
+__all__ = ["LightRAGInputsProcessor"]
--- a/.apolo/src/apolo_apps_lightrag/outputs_processor.py
+++ b/.apolo/src/apolo_apps_lightrag/outputs_processor.py
@ -0,0 +1,66 @@
+import logging
+import typing as t
+
+from apolo_app_types.clients.kube import get_service_host_port
+from apolo_app_types.outputs.base import BaseAppOutputsProcessor
+from apolo_app_types.outputs.common import (
+    INSTANCE_LABEL,
+    get_internal_external_web_urls,
+)
+from apolo_app_types.outputs.utils.ingress import get_ingress_host_port
+from apolo_app_types.protocols.common.networking import HttpApi, ServiceAPI, WebApp
+
+from .types import LightRAGAppOutputs
+
+
+logger = logging.getLogger(__name__)
+
+
+async def _generate_lightrag_outputs(
+    helm_values: dict[str, t.Any],
+    app_instance_id: str,
+) -> LightRAGAppOutputs:
+    labels = {"app.kubernetes.io/name": "lightrag", INSTANCE_LABEL: app_instance_id}
+    internal_web_app_url, external_web_app_url = await get_internal_external_web_urls(
+        labels
+    )
+    internal_host, internal_port = await get_service_host_port(match_labels=labels)
+    internal_server_url = None
+    if internal_host:
+        internal_server_url = HttpApi(
+            host=internal_host,
+            port=int(internal_port),
+            protocol="http",
+        )
+    external_server_url = None
+    ingress_host_port = await get_ingress_host_port(match_labels=labels)
+    if ingress_host_port:
+        external_server_url = HttpApi(
+            host=ingress_host_port[0],
+            port=int(ingress_host_port[1]),
+            protocol="https",
+        )
+    return LightRAGAppOutputs(
+        app_url=ServiceAPI[WebApp](
+            internal_url=internal_web_app_url,
+            external_url=external_web_app_url,
+        ),
+        server_url=ServiceAPI[HttpApi](
+            internal_url=internal_server_url,
+            external_url=external_server_url,
+        ),
+    )
+
+
+class LightRAGOutputsProcessor(BaseAppOutputsProcessor[LightRAGAppOutputs]):
+    async def _generate_outputs(
+        self,
+        helm_values: dict[str, t.Any],
+        app_instance_id: str,
+    ) -> LightRAGAppOutputs:
+        outputs = await _generate_lightrag_outputs(helm_values, app_instance_id)
+        logger.info("Got outputs: %s", outputs)
+        return outputs
+
+
+__all__ = ["LightRAGOutputsProcessor"]
--- a/.apolo/src/apolo_apps_lightrag/schemas/LightRAGAppInputs.json
+++ b/.apolo/src/apolo_apps_lightrag/schemas/LightRAGAppInputs.json
--- a/.apolo/src/apolo_apps_lightrag/schemas/LightRAGAppOutputs.json
+++ b/.apolo/src/apolo_apps_lightrag/schemas/LightRAGAppOutputs.json
@ -0,0 +1,245 @@
+{
+  "$defs": {
+    "HttpApi": {
+      "properties": {
+        "host": {
+          "title": "Host",
+          "type": "string",
+          "x-description": "The hostname of the HTTP endpoint.",
+          "x-is-advanced-field": false,
+          "x-meta-type": "inline",
+          "x-title": "Hostname"
+        },
+        "port": {
+          "default": 80,
+          "exclusiveMinimum": 0,
+          "title": "Port",
+          "type": "integer",
+          "x-description": "The port of the HTTP endpoint.",
+          "x-is-advanced-field": false,
+          "x-meta-type": "inline",
+          "x-title": "Port"
+        },
+        "protocol": {
+          "default": "http",
+          "title": "Protocol",
+          "type": "string",
+          "x-description": "The protocol to use, e.g., http or https.",
+          "x-is-advanced-field": false,
+          "x-meta-type": "inline",
+          "x-title": "Protocol"
+        },
+        "timeout": {
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": 30.0,
+          "title": "Timeout",
+          "x-description": "Connection timeout in seconds.",
+          "x-is-advanced-field": false,
+          "x-meta-type": "inline",
+          "x-title": "Connection Timeout"
+        },
+        "base_path": {
+          "default": "/",
+          "title": "Base Path",
+          "type": "string"
+        }
+      },
+      "required": [
+        "host"
+      ],
+      "title": "HttpApi",
+      "type": "object",
+      "x-description": "HTTP API Configuration.",
+      "x-is-advanced-field": false,
+      "x-meta-type": "inline",
+      "x-title": "HTTP API",
+      "x-type": "OpenAICompatibleEmbeddingsRestAPI"
+    },
+    "ServiceAPI_HttpApi_": {
+      "properties": {
+        "internal_url": {
+          "anyOf": [
+            {
+              "$ref": "#/$defs/HttpApi"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "x-description": "Internal URL to access the service. This route is not protected by platform authorization and only workloads from the same project can access it.",
+          "x-is-advanced-field": false,
+          "x-meta-type": "inline",
+          "x-title": "Internal URL"
+        },
+        "external_url": {
+          "anyOf": [
+            {
+              "$ref": "#/$defs/HttpApi"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "x-description": "External URL for accessing the service from outside the cluster. This route might be secured by platform authorization and is accessible from any network with a valid platform authorization token that has appropriate permissions.",
+          "x-is-advanced-field": false,
+          "x-meta-type": "inline",
+          "x-title": "External URL"
+        }
+      },
+      "title": "ServiceAPI[HttpApi]",
+      "type": "object",
+      "x-description": "Service APIs URLs.",
+      "x-is-advanced-field": false,
+      "x-meta-type": "inline",
+      "x-title": "Service APIs",
+      "x-type": "ServiceAPI[GrpcAPI]"
+    },
+    "ServiceAPI_WebApp_": {
+      "properties": {
+        "internal_url": {
+          "anyOf": [
+            {
+              "$ref": "#/$defs/WebApp"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "x-description": "Internal URL to access the service. This route is not protected by platform authorization and only workloads from the same project can access it.",
+          "x-is-advanced-field": false,
+          "x-meta-type": "inline",
+          "x-title": "Internal URL"
+        },
+        "external_url": {
+          "anyOf": [
+            {
+              "$ref": "#/$defs/WebApp"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "x-description": "External URL for accessing the service from outside the cluster. This route might be secured by platform authorization and is accessible from any network with a valid platform authorization token that has appropriate permissions.",
+          "x-is-advanced-field": false,
+          "x-meta-type": "inline",
+          "x-title": "External URL"
+        }
+      },
+      "title": "ServiceAPI[WebApp]",
+      "type": "object",
+      "x-description": "Service APIs URLs.",
+      "x-is-advanced-field": false,
+      "x-meta-type": "inline",
+      "x-title": "Service APIs",
+      "x-type": "ServiceAPI[GrpcAPI]"
+    },
+    "WebApp": {
+      "properties": {
+        "host": {
+          "title": "Host",
+          "type": "string",
+          "x-description": "The hostname of the HTTP endpoint.",
+          "x-is-advanced-field": false,
+          "x-meta-type": "inline",
+          "x-title": "Hostname"
+        },
+        "port": {
+          "default": 80,
+          "exclusiveMinimum": 0,
+          "title": "Port",
+          "type": "integer",
+          "x-description": "The port of the HTTP endpoint.",
+          "x-is-advanced-field": false,
+          "x-meta-type": "inline",
+          "x-title": "Port"
+        },
+        "protocol": {
+          "default": "http",
+          "title": "Protocol",
+          "type": "string",
+          "x-description": "The protocol to use, e.g., http or https.",
+          "x-is-advanced-field": false,
+          "x-meta-type": "inline",
+          "x-title": "Protocol"
+        },
+        "timeout": {
+          "anyOf": [
+            {
+              "type": "number"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": 30.0,
+          "title": "Timeout",
+          "x-description": "Connection timeout in seconds.",
+          "x-is-advanced-field": false,
+          "x-meta-type": "inline",
+          "x-title": "Connection Timeout"
+        },
+        "base_path": {
+          "default": "/",
+          "title": "Base Path",
+          "type": "string"
+        },
+        "api_type": {
+          "const": "webapp",
+          "default": "webapp",
+          "title": "Api Type",
+          "type": "string"
+        }
+      },
+      "required": [
+        "host"
+      ],
+      "title": "WebApp",
+      "type": "object",
+      "x-description": "HTTP API Configuration.",
+      "x-is-advanced-field": false,
+      "x-meta-type": "inline",
+      "x-title": "HTTP API",
+      "x-type": "OpenAICompatibleEmbeddingsRestAPI"
+    }
+  },
+  "description": "LightRAG outputs.",
+  "properties": {
+    "app_url": {
+      "anyOf": [
+        {
+          "$ref": "#/$defs/ServiceAPI_WebApp_"
+        },
+        {
+          "type": "null"
+        }
+      ],
+      "default": null,
+      "description": "The main application URL for accessing the service. This is the primary endpoint users should use to access the application.",
+      "title": "Application URL"
+    },
+    "server_url": {
+      "anyOf": [
+        {
+          "$ref": "#/$defs/ServiceAPI_HttpApi_"
+        },
+        {
+          "type": "null"
+        }
+      ],
+      "default": null
+    }
+  },
+  "title": "LightRAGAppOutputs",
+  "type": "object"
+}
--- a/.apolo/src/apolo_apps_lightrag/types.py
+++ b/.apolo/src/apolo_apps_lightrag/types.py
@ -0,0 +1,426 @@
+from typing import Literal
+
+from pydantic import BaseModel, ConfigDict, Field, field_validator
+
+from apolo_app_types import AppInputs, AppOutputs
+from apolo_app_types.protocols.common import (
+    IngressHttp,
+    Preset,
+    SchemaExtraMetadata,
+    SchemaMetaType,
+)
+from apolo_app_types.protocols.common.networking import HttpApi, RestAPI, ServiceAPI
+from apolo_app_types.protocols.common.openai_compat import (
+    OpenAICompatChatAPI,
+    OpenAICompatEmbeddingsAPI,
+)
+from apolo_app_types.protocols.postgres import CrunchyPostgresUserCredentials
+
+
+class LightRAGPersistence(BaseModel):
+    model_config = ConfigDict(
+        protected_namespaces=(),
+        json_schema_extra=SchemaExtraMetadata(
+            title="LightRAG Persistence",
+            description="Configure persistent storage for LightRAG data and inputs.",
+        ).as_json_schema_extra(),
+    )
+
+    rag_storage_size: int = Field(
+        default=10,
+        gt=0,
+        json_schema_extra=SchemaExtraMetadata(
+            title="RAG Storage Size (GB)",
+            description="Size of the persistent volume for RAG data storage.",
+        ).as_json_schema_extra(),
+    )
+    inputs_storage_size: int = Field(
+        default=5,
+        gt=0,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Inputs Storage Size (GB)",
+            description="Size of the persistent volume for input files.",
+        ).as_json_schema_extra(),
+    )
+
+    @field_validator("rag_storage_size", "inputs_storage_size", mode="before")
+    @classmethod
+    def validate_storage_size(cls, value: int) -> int:
+        if value and isinstance(value, int) and value < 1:
+            error_message = "Storage size must be greater than 1GB."
+            raise ValueError(error_message)
+        return value
+
+
+class OpenAILLMProvider(RestAPI):
+    """OpenAI LLM provider configuration."""
+
+    model_config = ConfigDict(
+        protected_namespaces=(),
+        json_schema_extra=SchemaExtraMetadata(
+            title="OpenAI LLM Provider",
+            description="OpenAI chat completion API configuration.",
+            meta_type=SchemaMetaType.INLINE,
+        ).as_json_schema_extra(),
+    )
+
+    host: str = Field(
+        default="api.openai.com",
+        json_schema_extra=SchemaExtraMetadata(
+            title="Host",
+            description="OpenAI API host",
+        ).as_json_schema_extra(),
+    )
+    port: int = Field(
+        default=443,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Port",
+            description="Set the port.",
+        ).as_json_schema_extra(),
+    )
+    protocol: Literal["https"] = "https"
+    timeout: int | None = Field(
+        default=60,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Timeout",
+            description="Set the connection timeout in seconds.",
+        ).as_json_schema_extra(),
+    )
+    base_path: str = "/v1"
+    provider: Literal["openai"] = "openai"
+    model: str = Field(
+        default="gpt-4o-mini",
+        json_schema_extra=SchemaExtraMetadata(
+            title="Model",
+            description="Chat completion model name.",
+        ).as_json_schema_extra(),
+    )
+    api_key: str = Field(
+        default="",
+        json_schema_extra=SchemaExtraMetadata(
+            title="API Key",
+            description="OpenAI API key.",
+        ).as_json_schema_extra(),
+    )
+
+
+class AnthropicLLMProvider(RestAPI):
+    """Anthropic LLM provider configuration."""
+
+    model_config = ConfigDict(
+        protected_namespaces=(),
+        json_schema_extra=SchemaExtraMetadata(
+            title="Anthropic LLM Provider",
+            description="Anthropic Claude API configuration.",
+            meta_type=SchemaMetaType.INLINE,
+        ).as_json_schema_extra(),
+    )
+
+    host: str = Field(
+        default="api.anthropic.com",
+        json_schema_extra=SchemaExtraMetadata(
+            title="Host",
+            description="Anthropic API host",
+        ).as_json_schema_extra(),
+    )
+    port: int = Field(
+        default=443,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Port",
+            description="Set the port.",
+        ).as_json_schema_extra(),
+    )
+    protocol: Literal["https"] = "https"
+    timeout: int | None = Field(
+        default=60,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Timeout",
+            description="Set the connection timeout in seconds.",
+        ).as_json_schema_extra(),
+    )
+    base_path: str = "/v1"
+    provider: Literal["anthropic"] = "anthropic"
+    model: str = Field(
+        default="claude-3-5-sonnet-20241022",
+        json_schema_extra=SchemaExtraMetadata(
+            title="Model",
+            description="Anthropic Claude model name.",
+        ).as_json_schema_extra(),
+    )
+    api_key: str = Field(
+        default="",
+        json_schema_extra=SchemaExtraMetadata(
+            title="API Key",
+            description="Anthropic API key.",
+        ).as_json_schema_extra(),
+    )
+
+
+class OllamaLLMProvider(RestAPI):
+    """Ollama LLM provider configuration."""
+
+    model_config = ConfigDict(
+        protected_namespaces=(),
+        json_schema_extra=SchemaExtraMetadata(
+            title="Ollama LLM Provider",
+            description="Configuration for a self-hosted Ollama server.",
+            meta_type=SchemaMetaType.INLINE,
+        ).as_json_schema_extra(),
+    )
+
+    host: str = Field(
+        json_schema_extra=SchemaExtraMetadata(
+            title="Host",
+            description="Ollama server host.",
+        ).as_json_schema_extra(),
+    )
+    port: int = Field(
+        default=11434,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Port",
+            description="Ollama server port.",
+        ).as_json_schema_extra(),
+    )
+    protocol: Literal["http", "https"] = Field(
+        default="http",
+        json_schema_extra=SchemaExtraMetadata(
+            title="Protocol",
+            description="Ollama server protocol.",
+        ).as_json_schema_extra(),
+    )
+    timeout: int | None = Field(
+        default=300,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Timeout",
+            description="Configure connection timeout in seconds.",
+        ).as_json_schema_extra(),
+    )
+    base_path: str = "/api"
+    provider: Literal["ollama"] = "ollama"
+    model: str = Field(
+        default="llama3.1:8b-instruct-q4_0",
+        json_schema_extra=SchemaExtraMetadata(
+            title="Model",
+            description="Ollama model name.",
+        ).as_json_schema_extra(),
+    )
+
+
+class GeminiLLMProvider(RestAPI):
+    """Google Gemini LLM provider configuration."""
+
+    model_config = ConfigDict(
+        protected_namespaces=(),
+        json_schema_extra=SchemaExtraMetadata(
+            title="Gemini LLM Provider",
+            description="Google Gemini API configuration.",
+            meta_type=SchemaMetaType.INLINE,
+        ).as_json_schema_extra(),
+    )
+
+    host: str = Field(
+        default="generativelanguage.googleapis.com",
+        json_schema_extra=SchemaExtraMetadata(
+            title="Host",
+            description="Google AI API host",
+        ).as_json_schema_extra(),
+    )
+    port: int = Field(
+        default=443,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Port",
+            description="Set the port.",
+        ).as_json_schema_extra(),
+    )
+    protocol: Literal["https"] = "https"
+    timeout: int | None = Field(
+        default=60,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Timeout",
+            description="Configure connection timeout in seconds.",
+        ).as_json_schema_extra(),
+    )
+    base_path: str = "/v1"
+    provider: Literal["gemini"] = "gemini"
+    model: str = Field(
+        default="gemini-1.5-flash",
+        json_schema_extra=SchemaExtraMetadata(
+            title="Model",
+            description="Google Gemini model name.",
+        ).as_json_schema_extra(),
+    )
+    api_key: str = Field(
+        default="",
+        json_schema_extra=SchemaExtraMetadata(
+            title="API Key",
+            description="Google AI API key.",
+        ).as_json_schema_extra(),
+    )
+
+
+LLMProvider = (
+    OpenAICompatChatAPI
+    | OpenAILLMProvider
+    | AnthropicLLMProvider
+    | OllamaLLMProvider
+    | GeminiLLMProvider
+)
+
+
+class OpenAIEmbeddingProvider(RestAPI):
+    """OpenAI embedding provider configuration."""
+
+    model_config = ConfigDict(
+        protected_namespaces=(),
+        json_schema_extra=SchemaExtraMetadata(
+            title="OpenAI Embedding Provider",
+            description="OpenAI embeddings API configuration.",
+            meta_type=SchemaMetaType.INLINE,
+        ).as_json_schema_extra(),
+    )
+
+    host: str = Field(
+        default="api.openai.com",
+        json_schema_extra=SchemaExtraMetadata(
+            title="Host",
+            description="OpenAI API host",
+        ).as_json_schema_extra(),
+    )
+    port: int = Field(
+        default=443,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Port",
+            description="Set the port.",
+        ).as_json_schema_extra(),
+    )
+    protocol: Literal["https"] = "https"
+    timeout: int | None = Field(
+        default=60,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Timeout",
+            description="Set the connection timeout in seconds.",
+        ).as_json_schema_extra(),
+    )
+    base_path: str = "/v1"
+    provider: Literal["openai"] = "openai"
+    model: str = Field(
+        default="text-embedding-ada-002",
+        json_schema_extra=SchemaExtraMetadata(
+            title="Model",
+            description="Embedding model name.",
+        ).as_json_schema_extra(),
+    )
+    api_key: str = Field(
+        default="",
+        json_schema_extra=SchemaExtraMetadata(
+            title="API Key",
+            description="OpenAI API key.",
+        ).as_json_schema_extra(),
+    )
+
+
+class OllamaEmbeddingProvider(RestAPI):
+    """Ollama embedding provider configuration."""
+
+    model_config = ConfigDict(
+        protected_namespaces=(),
+        json_schema_extra=SchemaExtraMetadata(
+            title="Ollama Embedding Provider",
+            description="Ollama local embedding model configuration.",
+            meta_type=SchemaMetaType.INLINE,
+        ).as_json_schema_extra(),
+    )
+
+    host: str = Field(
+        json_schema_extra=SchemaExtraMetadata(
+            title="Host",
+            description="Ollama server host.",
+        ).as_json_schema_extra(),
+    )
+    port: int = Field(
+        default=11434,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Port",
+            description="Ollama server port.",
+        ).as_json_schema_extra(),
+    )
+    protocol: Literal["http", "https"] = Field(
+        default="http",
+        json_schema_extra=SchemaExtraMetadata(
+            title="Protocol",
+            description="Ollama server protocol.",
+        ).as_json_schema_extra(),
+    )
+    timeout: int | None = Field(
+        default=300,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Timeout",
+            description="Configure connection timeout in seconds.",
+        ).as_json_schema_extra(),
+    )
+    base_path: str = "/api"
+    provider: Literal["ollama"] = "ollama"
+    model: str = Field(
+        default="nomic-embed-text",
+        json_schema_extra=SchemaExtraMetadata(
+            title="Model",
+            description="Ollama embedding model name.",
+        ).as_json_schema_extra(),
+    )
+
+
+EmbeddingProvider = (
+    OpenAICompatEmbeddingsAPI | OpenAIEmbeddingProvider | OllamaEmbeddingProvider
+)
+
+
+LightRAGLLMConfig = LLMProvider
+LightRAGEmbeddingConfig = EmbeddingProvider
+
+
+class LightRAGAppInputs(AppInputs):
+    preset: Preset
+    ingress_http: IngressHttp
+    pgvector_user: CrunchyPostgresUserCredentials
+    llm_config: LightRAGLLMConfig = Field(
+        default=OpenAICompatChatAPI(host="", port=443, protocol="https"),
+        json_schema_extra=SchemaExtraMetadata(
+            title="LLM Configuration",
+            description="LLM provider configuration.",
+        ).as_json_schema_extra(),
+    )
+    embedding_config: LightRAGEmbeddingConfig = Field(
+        default=OpenAICompatEmbeddingsAPI(host="", port=443, protocol="https"),
+        json_schema_extra=SchemaExtraMetadata(
+            title="Embedding Configuration",
+            description="Embedding provider configuration.",
+        ).as_json_schema_extra(),
+    )
+    persistence: LightRAGPersistence = Field(
+        default_factory=LightRAGPersistence,
+        json_schema_extra=SchemaExtraMetadata(
+            title="Persistence Configuration",
+            description="Configure persistent storage for LightRAG data and inputs.",
+        ).as_json_schema_extra(),
+    )
+
+
+class LightRAGAppOutputs(AppOutputs):
+    """LightRAG outputs."""
+
+    server_url: ServiceAPI[HttpApi] | None = None
+
+
+__all__ = [
+    "LightRAGAppInputs",
+    "LightRAGAppOutputs",
+    "LightRAGEmbeddingConfig",
+    "LightRAGLLMConfig",
+    "LightRAGPersistence",
+    "OpenAILLMProvider",
+    "AnthropicLLMProvider",
+    "OllamaLLMProvider",
+    "GeminiLLMProvider",
+    "OpenAIEmbeddingProvider",
+    "OllamaEmbeddingProvider",
+]
--- a/.apolo/tests/init.py
+++ b/.apolo/tests/init.py
--- a/.apolo/tests/conftest.py
+++ b/.apolo/tests/conftest.py
@ -0,0 +1,6 @@
+import pytest
+
+pytest_plugins = [
+    "apolo_app_types_fixtures.apolo_clients",
+    "apolo_app_types_fixtures.constants",
+]
--- a/.apolo/tests/unit/init.py
+++ b/.apolo/tests/unit/init.py
--- a/.env.example
+++ b/.env.example
@ -23,7 +23,7 @@ LLM_BINDING_API_KEY=sk-your-openai-api-key-here
 # EMBEDDING_BINDING_HOST=https://api.openai.com/v1
 # EMBEDDING_BINDING_API_KEY=sk-your-openai-api-key-here
 EMBEDDING_BINDING=openai
-EMBEDDING_MODEL=gemini-embedding-001 
+EMBEDDING_MODEL=gemini-embedding-001
 EMBEDDING_DIM=3072
 EMBEDDING_BINDING_HOST=https://generativelanguage.googleapis.com/v1beta/openai/
 EMBEDDING_BINDING_API_KEY=AI-your-gemini-api-key-here
--- a/.github/actionlint-matcher.json
+++ b/.github/actionlint-matcher.json
@ -0,0 +1,17 @@
+{
+    "problemMatcher": [
+        {
+            "owner": "actionlint",
+            "pattern": [
+                {
+                    "code": 5,
+                    "column": 3,
+                    "file": 1,
+                    "line": 2,
+                    "message": 4,
+                    "regexp": "^(?:\\x1b\\[\\d+m)?(.+?)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*: (?:\\x1b\\[\\d+m)*(.+?)(?:\\x1b\\[\\d+m)* \\[(.+?)\\]$"
+                }
+            ]
+        }
+    ]
+}
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@ -0,0 +1,106 @@
+name: CI
+
+on:
+  push:
+    branches: [master]
+    tags: ["v*"]
+  pull_request:
+    branches: [master]
+  schedule:
+    - cron: 0 4 * * *
+
+jobs:
+  test:
+    name: All checks are passed
+    uses: ./.github/workflows/test.yaml
+    secrets: inherit
+
+  approve:
+    name: Approve bot PR
+    runs-on: ubuntu-latest
+    if: endsWith(github.actor, '[bot]')
+    needs: test
+    permissions:
+      contents: write
+      pull-requests: write
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: metadata
+        id: metadata
+        if: github.actor == 'dependabot[bot]'
+        uses: dependabot/fetch-metadata@v2
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+      - name: Enable auto-merge for bot PRs
+        run: gh pr merge --auto --squash --delete-branch "$PR_URL"
+        env:
+          PR_URL: ${{ github.event.pull_request.html_url }}
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+  release-processor-image:
+    name: Release processor image
+    runs-on: ubuntu-latest
+    needs: test
+    if: github.event_name == 'push' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/master')
+    steps:
+      - name: Checkout commit
+        uses: actions/checkout@v4
+      - name: Purge old artifacts
+        uses: kolpav/purge-artifacts-action@v1
+        with:
+          token: ${{ github.token }}
+          expire-in: 30mins
+      - name: Login to ghcr.io
+        uses: docker/login-action@v3.5.0
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ github.token }}
+      - name: Release development image
+        run: |
+          make build-hook-image
+      - name: Release development image
+        run: |
+          export IMAGE_TAG=development
+          make push-hook-image
+      - name: Release prod image
+        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
+        run: |
+          export IMAGE_TAG=${GITHUB_REF#refs/tags/v}
+          make push-hook-image
+
+  release-processor-image-pr:
+    name: Release processor image from PR
+    runs-on: ubuntu-latest
+    needs: test
+    if: github.event_name == 'pull_request'
+    steps:
+      - name: Checkout commit
+        uses: actions/checkout@v4
+      - name: Purge old artifacts
+        uses: kolpav/purge-artifacts-action@v1
+        with:
+          token: ${{ github.token }}
+          expire-in: 30mins
+      - name: Login to ghcr.io
+        uses: docker/login-action@v3.5.0
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ github.token }}
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ghcr.io/neuro-inc/app-lightrag
+          tags: |
+            type=ref,event=pr
+      - name: Build Docker image
+        run: make build-hook-image
+      - name: Push release for testing
+        if: github.actor != 'dependabot[bot]'
+        run: |
+          FULL_IMAGE_NAME=${{ steps.meta.outputs.tags }}
+          export IMAGE_TAG=${FULL_IMAGE_NAME##*:}
+          make push-hook-image
--- a/.github/workflows/linting.yaml
+++ b/.github/workflows/linting.yaml
@ -24,7 +24,7 @@ jobs:
            - name: Install dependencies
              run: |
                python -m pip install --upgrade pip
-                pip install pre-commit
+                pip install pre-commit apolo-app-types

            - name: Run pre-commit
              run: pre-commit run --all-files --show-diff-on-failure
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@ -0,0 +1,74 @@
+on:
+  workflow_call: {}
+
+jobs:
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout commit
+        uses: actions/checkout@v4
+      - name: Install python
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.12
+      - name: Cache pre-commit hooks
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pre-commit
+          key: pre-commit|py3.12|${{ hashFiles('.pre-commit-config.yaml') }}
+      - name: Install Poetry
+        run: pip install poetry
+      - name: Setup Python dependencies cache
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pypoetry
+          key: poetry-${{ hashFiles('**/poetry.lock') }}
+          restore-keys: |
+            poetry-
+      - name: Install dependencies
+        run: make install
+      - name: Add local venv to PATH
+        run: echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH
+      - name: Lint
+        run: |
+          echo "::add-matcher::.github/actionlint-matcher.json"
+          make lint
+  test-unit:
+    name: Unit tests
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout commit
+        uses: actions/checkout@v4
+      - name: Install python
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: .python-version
+          cache: pip
+      - name: Install Poetry
+        run: pip install poetry
+      - name: Setup Python dependencies cache
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pypoetry
+          key: poetry-${{ hashFiles('**/poetry.lock') }}
+          restore-keys: |
+            poetry-
+      - name: Install dependencies
+        run: make setup
+      - name: Add local venv to PATH
+        run: echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH
+      - name: Run unit tests
+        run: make test-unit
+
+  check:
+    name: Check
+    needs:
+      - lint
+    runs-on: ubuntu-latest
+    if: always()
+    steps:
+      - name: Decide whether the needed jobs succeeded or failed
+        uses: re-actors/alls-green@release/v1
+        with:
+          jobs: ${{ toJSON(needs) }}
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -26,3 +26,23 @@ repos:
      - id: check-manifest
        stages: [manual]
        exclude: ^lightrag/api/webui/
+
+
+  - repo: https://github.com/mrtazz/checkmake.git
+    rev: 0.2.2
+    hooks:
+      - id: checkmake
+
+
+  - repo: https://github.com/gruntwork-io/pre-commit
+    rev: v0.1.30
+    hooks:
+      - id: helmlint
+
+
+  - repo: local
+    hooks:
+      - id: generate-types-schemas
+        name: Generate types schemas
+        entry: make -f .apolo/scripts/gen_types_schemas.mk gen-types-schemas
+        language: system
--- a/.python-version
+++ b/.python-version
@ -0,0 +1 @@
+3.11
--- a/11
+++ b/11
@ -7,16 +7,7 @@ CHART_PACKAGE_DIR := dist/charts
 HELM_REGISTRY := ghcr.io/neuro-inc/helm-charts

 RAW_VERSION := $(if $(VERSION),$(VERSION),$(shell git describe --tags --always --dirty 2>/dev/null))
-SANITIZED_VERSION := $(shell RAW="$(RAW_VERSION)" python - <<'PY'
-import os, re
-raw = os.environ.get("RAW", "").strip()
-if not raw:
-    raw = "0.0.0"
-raw = raw.lstrip("v")
-sanitized = re.sub(r"[^0-9A-Za-z\\.\\-]", "-", raw)
-print(sanitized or "0.0.0")
-PY
-)
+SANITIZED_VERSION := $(shell python -c 'import re; raw = "$(RAW_VERSION)".strip(); raw = raw[1:] if raw.startswith("v") else raw; raw = raw or "0.0.0"; sanitized = re.sub(r"[^0-9A-Za-z.\-]", "-", raw); print(sanitized or "0.0.0")')
 CHART_VERSION := $(SANITIZED_VERSION)
 CHART_PACKAGE := $(CHART_PACKAGE_DIR)/$(CHART_NAME)-$(CHART_VERSION).tgz

--- a/README_load_docs.md
+++ b/README_load_docs.md
@ -8,7 +8,7 @@ Advanced script to load markdown documentation into LightRAG with flexible refer
 # Default mode (file path references)
 python load_docs.py /path/to/your/docs

-# URL mode (website link references) 
+# URL mode (website link references)
 python load_docs.py /path/to/docs --mode urls --base-url https://docs.example.com/
 ```

@ -28,7 +28,7 @@ python load_docs.py docs/ --mode files
 - [KG] administration/setup.md
 ```

-### URLs Mode  
+### URLs Mode
 Uses website URLs in query response citations:
 ```bash
 python load_docs.py docs/ --mode urls --base-url https://docs.apolo.us/index/
@ -37,7 +37,7 @@ python load_docs.py docs/ --mode urls --base-url https://my-docs.com/v1/

 **Query Response Example:**
 ```
-### References  
+### References
 - [DC] https://docs.apolo.us/index/getting-started/installation
 - [KG] https://docs.apolo.us/index/administration/setup
 ```
@ -68,7 +68,7 @@ docs/
 ```bash
 # Visit your docs site and note the URL patterns:
 # https://docs.example.com/getting-started/installation
-# https://docs.example.com/api/authentication  
+# https://docs.example.com/api/authentication
 # https://docs.example.com/guides/deployment
 ```

@ -80,7 +80,7 @@ mkdir -p docs/{getting-started,api,guides}
 **Step 3: Organize Your Markdown Files**
 ```bash
 # Match each URL to a file path:
-docs/getting-started/installation.md     # → /getting-started/installation  
+docs/getting-started/installation.md     # → /getting-started/installation
 docs/api/authentication.md              # → /api/authentication
 docs/guides/deployment.md               # → /guides/deployment
 docs/guides/README.md                   # → /guides (overview page)
@ -109,7 +109,7 @@ curl -I https://docs.example.com/api/authentication
 apolo-docs/
 ├── getting-started/
 │   ├── first-steps/
-│   │   ├── getting-started.md   → /index/getting-started/first-steps/getting-started  
+│   │   ├── getting-started.md   → /index/getting-started/first-steps/getting-started
 │   │   └── README.md           → /index/getting-started/first-steps
 │   ├── apolo-base-docker-image.md → /index/getting-started/apolo-base-docker-image
 │   └── faq.md                  → /index/getting-started/faq
@ -135,14 +135,14 @@ python load_docs.py docs/ --endpoint https://lightrag.example.com
 # Load to local instance, skip test query
 python load_docs.py docs/ --no-test

-# Files mode with custom endpoint  
+# Files mode with custom endpoint
 python load_docs.py docs/ --mode files --endpoint http://localhost:9621
 ```

 ## Features

 - **Dual Reference Modes**: File paths or live website URLs in citations
- **Flexible Base URL**: Works with any documentation site structure  
+- **Flexible Base URL**: Works with any documentation site structure
 - **Simple dependency**: Only requires `httpx` and Python standard library
 - **Automatic discovery**: Finds all `.md` files recursively
 - **Smart metadata**: Adds appropriate title, path/URL, and source information
@ -171,4 +171,4 @@ This loader is perfect for:
 pip install httpx
 ```

-**Note**: This script is included with LightRAG deployments and provides a simple way to load any markdown documentation into your LightRAG instance.
+**Note**: This script is included with LightRAG deployments and provides a simple way to load any markdown documentation into your LightRAG instance.
--- a/hooks.Dockerfile
+++ b/hooks.Dockerfile
@ -0,0 +1,22 @@
+FROM python:3.12-slim
+
+LABEL org.opencontainers.image.source="https://github.com/neuro-inc/LightRAG"
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    POETRY_NO_INTERACTION=1 \
+    POETRY_VIRTUALENVS_CREATE=0
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY README.md poetry.lock pyproject.toml .
+RUN pip --no-cache-dir install poetry && poetry install --no-root --no-cache
+
+COPY .apolo .apolo
+RUN poetry install --only-root --no-cache
+
+ENTRYPOINT ["app-types"]
--- a/load_docs.py
+++ b/load_docs.py
@ -14,8 +14,8 @@ from typing import Dict, List, Optional


 async def load_document_to_lightrag(
-    content: str, 
-    title: str, 
+    content: str,
+    title: str,
    doc_url: str,
    endpoint: str = "http://localhost:9621",
    headers: Optional[Dict[str, str]] = None
@ -34,7 +34,7 @@ async def load_document_to_lightrag(
                    "file_source": doc_url
                }
            )
-            
+
            if response.status_code == 200:
                print(f"✅ Loaded: {title}")
                return True
@ -47,7 +47,7 @@ async def load_document_to_lightrag(
                    except:
                        print(f"   Response: {response.text}")
                return False
-                
+
    except Exception as e:
        print(f"❌ Error loading {title}: {e}")
        return False
@ -58,27 +58,27 @@ def convert_file_path_to_url(relative_path: str, base_url: str) -> str:
    # Ensure base URL ends with /
    if not base_url.endswith('/'):
        base_url += '/'
-    
+
    # Handle special cases
    if relative_path in ["README.md", "SUMMARY.md"]:
        return base_url.rstrip('/')
-    
+
    # Remove .md extension and convert path
    url_path = relative_path.replace(".md", "")
-    
+
    # Handle README files in subdirectories - they map to the directory URL
    if url_path.endswith("/README"):
        url_path = url_path[:-7]  # Remove "/README"
-    
+
    # Clean up any double slashes
    url_path = url_path.strip("/")
-    
+
    return f"{base_url}{url_path}"


 def load_markdown_files(docs_path: Path, mode: str = "files", base_url: str = None) -> List[tuple]:
    """Load all markdown files from directory structure
-    
+
    Args:
        docs_path: Path to documentation directory
        mode: 'files' for file paths, 'urls' for URL references
@ -86,42 +86,42 @@ def load_markdown_files(docs_path: Path, mode: str = "files", base_url: str = No
    """
    if not docs_path.exists():
        raise FileNotFoundError(f"Documentation directory not found: {docs_path}")
-    
+
    if mode == "urls" and not base_url:
        raise ValueError("base_url is required when mode is 'urls'")
-    
+
    # Find all markdown files, excluding SUMMARY.md as it's just the table of contents
    md_files = [f for f in docs_path.rglob("*.md") if f.name != "SUMMARY.md"]
    print(f"📚 Found {len(md_files)} markdown files")
    print(f"🔧 Mode: {mode}")
    if mode == "urls":
        print(f"🌐 Base URL: {base_url}")
-    
+
    documents = []
-    
+
    for file_path in md_files:
        try:
            # Load content
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read().strip()
-            
+
            if not content:
                continue
-                
+
            # Generate title from filename
            title = file_path.stem.replace("-", " ").replace("_", " ").title()
            if title.lower() == "readme":
                # Use parent directory name for README files
                title = f"{file_path.parent.name.replace('-', ' ').replace('_', ' ').title()} Overview"
-            
+
            # Get relative path for metadata
            relative_path = str(file_path.relative_to(docs_path))
-            
+
            if mode == "files":
                # Use file path as reference
                reference = relative_path
                source_info = f"File: {file_path.name}"
-                
+
                # Prepare content with file metadata
                content_with_metadata = f"""
 Title: {title}
@ -134,7 +134,7 @@ Source: {source_info}
                # Convert file path to documentation URL
                reference = convert_file_path_to_url(relative_path, base_url)
                source_info = f"Documentation Site"
-                
+
                # Prepare content with URL metadata
                content_with_metadata = f"""
 Title: {title}
@ -143,13 +143,13 @@ Source: {source_info}

 {content}
 """
-            
+
            documents.append((content_with_metadata, title, reference))
-            
+
        except Exception as e:
            print(f"⚠️ Error processing {file_path}: {e}")
            continue
-    
+
    return documents


@ -189,7 +189,7 @@ async def test_query(
                headers=request_headers,
                json={"query": "What is this documentation about?", "mode": "local"}
            )
-            
+
            if response.status_code == 200:
                result = response.json()
                print(f"✅ Query successful!")
@ -202,7 +202,7 @@ async def test_query(
                        print(f"   Error details: {error_detail}")
                    except:
                        print(f"   Response: {response.text}")
-                
+
    except Exception as e:
        print(f"❌ Query error: {e}")

@ -216,22 +216,22 @@ async def main():
 Examples:
  # Load with file path references (default mode)
  python load_docs.py ../apolo-copilot/docs/official-apolo-documentation/docs
-  
+
  # Load with URL references
  python load_docs.py docs/ --mode urls --base-url https://docs.apolo.us/index/
-  
+
  # Load Apolo docs with URL references (common use case)
  python load_docs.py ../apolo-copilot/docs/official-apolo-documentation/docs \\
    --mode urls --base-url https://docs.apolo.us/index/
-  
+
  # Use custom endpoint
  python load_docs.py docs/ --endpoint https://lightrag.example.com
-  
+
  # Load with different documentation base URL
  python load_docs.py docs/ --mode urls --base-url https://my-docs.example.com/docs/
 """
    )
-    
+
    parser.add_argument(
        "docs_path",
        nargs="?",
@ -259,7 +259,7 @@ Examples:
        action="store_true",
        help="Skip test query after loading"
    )
-    
+
    args = parser.parse_args()
    api_key = os.getenv("LIGHTRAG_API_KEY")
    if api_key:
@ -267,7 +267,7 @@ Examples:
    else:
        auth_headers = None
        print("ℹ️ LIGHTRAG_API_KEY not set, continuing without authentication.")
-    
+
    print("🚀 Loading Documentation into LightRAG")
    print("=" * 60)
    print(f"📁 Documentation path: {args.docs_path}")
@ -280,12 +280,12 @@ Examples:
            sys.exit(1)
    print(f"🌐 LightRAG endpoint: {args.endpoint}")
    print()
-    
+
    # Test LightRAG connectivity
    if not await test_lightrag_health(args.endpoint, headers=auth_headers):
        print("❌ Cannot connect to LightRAG. Please ensure it's running and accessible.")
        sys.exit(1)
-    
+
    # Load documents
    docs_path = Path(args.docs_path).resolve()
    try:
@ -293,24 +293,24 @@ Examples:
    except (FileNotFoundError, ValueError) as e:
        print(f"❌ {e}")
        sys.exit(1)
-    
+
    if not documents:
        print("❌ No markdown files found to load")
        sys.exit(1)
-    
+
    # Calculate statistics
    total_content = sum(len(content) for content, _, _ in documents)
    avg_content = total_content // len(documents) if documents else 0
-    
+
    print(f"📊 Total content: {total_content:,} characters")
    print(f"📊 Average length: {avg_content:,} characters")
-    
+
    # Load documents
    successful = 0
    failed = 0
-    
+
    print(f"\n🔄 Starting to load documents...")
-    
+
    for i, (content, title, doc_url) in enumerate(documents):
        success = await load_document_to_lightrag(
            content,
@ -319,23 +319,23 @@ Examples:
            args.endpoint,
            headers=auth_headers
        )
-        
+
        if success:
            successful += 1
        else:
            failed += 1
-        
+
        # Progress update
        if (i + 1) % 10 == 0:
            print(f"📈 Progress: {i + 1}/{len(documents)} ({successful} success, {failed} failed)")
-        
+
        # Small delay to avoid overwhelming the service
        await asyncio.sleep(0.3)
-    
+
    print(f"\n✅ Loading complete!")
    print(f"📊 Successful: {successful}")
    print(f"📊 Failed: {failed}")
-    
+
    # Test query unless disabled
    if not args.no_test and successful > 0:
        await test_query(args.endpoint, headers=auth_headers)
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,111 @@
+[project]
+name = "apolo-apps-lightrag"
+version = "0.0.1"
+description = "Apolo LightRAG application"
+authors = [
+    {name = "Apolo", email = "dev@apolo.us"}
+]
+readme = "README.md"
+dynamic = ["version"]
+requires-python = ">=3.11.0,<4.0"
+
+[tool.poetry]
+name = "apolo-apps-lightrag"
+authors = ["Apolo.us"]
+packages = [
+  { include = "apolo_apps_lightrag", from = ".apolo/src" },
+]
+
+[tool.poetry.dependencies]
+apolo-sdk = "^25.7.2"
+pydantic = "^2.9.2"
+pyyaml = "^6.0.2"
+yarl = "^1.18.3"
+apolo-app-types = "^25.9.0"
+
+[tool.poetry.group.dev]
+optional = true
+
+[tool.poetry.group.dev.dependencies]
+pre-commit = "^4.2.0"
+types-PyYAML = "^6.0.12.20241230"
+pytest = "^8.3.4"
+pytest-asyncio = "^0.25.3"
+pytest-cov = "^6.2.1"
+mypy = "^1.17.1"
+
+[tool.ruff]
+target-version = "py311"
+lint.select = [
+    "E", "F", "I", "C90", "UP", "B", "ASYNC", "N", "FBT", "A", "C4", "EM", "FA", "ICN",
+    "G", "PIE", "T20", "PYI", "PT", "RET", "PTH"
+]
+lint.ignore = [
+  "A003",
+  "N818"
+]
+
+[tool.ruff.lint.isort.sections]
+ApoloSDK = ["apolo-sdk"]
+
+[tool.ruff.lint.isort]
+combine-as-imports = true
+lines-after-imports = 2
+section-order = ["future", "standard-library", "third-party", "ApoloSDK", "first-party", "local-folder"]
+known-first-party = ["apolo_app_types"]
+known-local-folder = ["tests"]
+
+[tool.mypy]
+check_untyped_defs = true
+disallow_any_generics = true
+disallow_untyped_defs = true
+follow_imports = "silent"
+strict_optional = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_unused_configs = true
+plugins = ['pydantic.mypy']
+exclude = [
+  "tests/"
+]
+
+[[tool.mypy.overrides]]
+module = "pytest"
+ignore_missing_imports = true
+
+[tool.flake8]
+extend-exclude = [
+  ".git",
+  ".env",
+  "__pycache__",
+  ".eggs",
+]
+max-line-length = 88
+extend-ignore = [
+  "N801",
+  "N802",
+  "N803",
+  "E252",
+  "W503",
+  "E133",
+  "E203",
+  "F541",
+]
+
+[tool.coverage.report]
+fail_under = 0
+skip_empty = true
+sort = "-cover"
+omit = [
+    "./apolo/tests/*",
+]
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "session"
+log_cli = false
+log_level = "INFO"
+junit_family = "xunit2"
+testpaths = [
+  "./apolo/tests/",
+]