Enhance documentation loader and build scripts

Refactors the documentation loading script for improved readability, type hinting, and error handling. Updates CLI argument parsing and output formatting for clarity. Replaces a simple makefile target with a more robust schema generation makefile including clean and test targets, and adds a placeholder test target to the Helm build system for consistency. Removes obsolete lint configuration for streamlined tooling setup. These changes improve maintainability and usability of schema generation and documentation loading workflows. Relates to MLO-469
2025-11-03 14:21:17 +02:00 · 2025-11-03 14:21:17 +02:00 · 81dff560f1
commit 81dff560f1
parent c11e91c614
5 changed files with 63 additions and 77 deletions
--- a/.apolo/scripts/gen_types_schemas.mk
+++ b/.apolo/scripts/gen_types_schemas.mk
@ -1,3 +1,14 @@
-.PHONY: gen-types-schemas
+.PHONY: all clean test gen-types-schemas
+
+all: gen-types-schemas
+
 gen-types-schemas:
 	@.apolo/scripts/gen_types_schemas.sh
+
+clean:
+	@rm -f .apolo/src/apolo_apps_lightrag/schemas/LightRAGAppInputs.json
+	@rm -f .apolo/src/apolo_apps_lightrag/schemas/LightRAGAppOutputs.json
+	@rm -f .apolo/src/apolo_apps_lightrag/types.py
+
+test:
+	@echo "No tests defined for schema generation."
--- a/.apolo/tests/conftest.py
+++ b/.apolo/tests/conftest.py
@ -1,5 +1,3 @@
-import pytest
-
 pytest_plugins = [
    "apolo_app_types_fixtures.apolo_clients",
    "apolo_app_types_fixtures.constants",
--- a/7
+++ b/7
@ -13,7 +13,9 @@ CHART_PACKAGE := $(CHART_PACKAGE_DIR)/$(CHART_NAME)-$(CHART_VERSION).tgz

 GITHUB_USERNAME := $(shell echo "$$APOLO_GITHUB_TOKEN" | base64 -d 2>/dev/null | cut -d: -f1 2>/dev/null || echo "oauth2")

-.PHONY: help helm-package helm-push clean
+.PHONY: all help helm-package helm-push clean test
+
+all: help

 help:
 	@echo "Available targets:"
@ -51,3 +53,6 @@ clean:
 	@echo "Removing packaged charts..."
 	rm -rf $(CHART_PACKAGE_DIR)
 	@echo "✅ Cleaned"
+
+test:
+	@echo "No automated tests for Helm packaging. Use 'helm test' as needed."
--- a/load_docs.py
+++ b/load_docs.py
@ -4,13 +4,13 @@ Simplified script to load documentation into LightRAG
 Loads all markdown files from a directory structure
 """

-import asyncio
-import httpx
 import argparse
-import sys
+import asyncio
 import os
+import sys
 from pathlib import Path
-from typing import Dict, List, Optional
+
+import httpx


 async def load_document_to_lightrag(
@ -18,7 +18,7 @@ async def load_document_to_lightrag(
    title: str,
    doc_url: str,
    endpoint: str = "http://localhost:9621",
-    headers: Optional[Dict[str, str]] = None
+    headers: dict[str, str] | None = None,
 ) -> bool:
    """Load a single document to LightRAG with URL reference"""
    try:
@ -29,24 +29,20 @@ async def load_document_to_lightrag(
            response = await client.post(
                f"{endpoint}/documents/text",
                headers=request_headers,
-                json={
-                    "text": content,
-                    "file_source": doc_url
-                }
+                json={"text": content, "file_source": doc_url},
            )

            if response.status_code == 200:
                print(f"✅ Loaded: {title}")
                return True
-            else:
-                print(f"❌ Failed to load {title}: {response.status_code}")
-                if response.status_code == 500:
-                    try:
-                        error_detail = response.json()
-                        print(f"   Error details: {error_detail}")
-                    except:
-                        print(f"   Response: {response.text}")
-                return False
+            print(f"❌ Failed to load {title}: {response.status_code}")
+            if response.status_code == 500:
+                try:
+                    error_detail = response.json()
+                    print(f"   Error details: {error_detail}")
+                except Exception:
+                    print(f"   Response: {response.text}")
+            return False

    except Exception as e:
        print(f"❌ Error loading {title}: {e}")
@ -56,12 +52,12 @@ async def load_document_to_lightrag(
 def convert_file_path_to_url(relative_path: str, base_url: str) -> str:
    """Convert file path to documentation URL"""
    # Ensure base URL ends with /
-    if not base_url.endswith('/'):
-        base_url += '/'
+    if not base_url.endswith("/"):
+        base_url += "/"

    # Handle special cases
    if relative_path in ["README.md", "SUMMARY.md"]:
-        return base_url.rstrip('/')
+        return base_url.rstrip("/")

    # Remove .md extension and convert path
    url_path = relative_path.replace(".md", "")
@ -76,7 +72,9 @@ def convert_file_path_to_url(relative_path: str, base_url: str) -> str:
    return f"{base_url}{url_path}"


-def load_markdown_files(docs_path: Path, mode: str = "files", base_url: str = None) -> List[tuple]:
+def load_markdown_files(
+    docs_path: Path, mode: str = "files", base_url: str = None
+) -> list[tuple]:
    """Load all markdown files from directory structure

    Args:
@ -102,7 +100,7 @@ def load_markdown_files(docs_path: Path, mode: str = "files", base_url: str = No
    for file_path in md_files:
        try:
            # Load content
-            with open(file_path, 'r', encoding='utf-8') as f:
+            with open(file_path, encoding="utf-8") as f:
                content = f.read().strip()

            if not content:
@ -133,7 +131,7 @@ Source: {source_info}
            else:  # urls mode
                # Convert file path to documentation URL
                reference = convert_file_path_to_url(relative_path, base_url)
-                source_info = f"Documentation Site"
+                source_info = "Documentation Site"

                # Prepare content with URL metadata
                content_with_metadata = f"""
@ -154,8 +152,7 @@ Source: {source_info}


 async def test_lightrag_health(
-    endpoint: str = "http://localhost:9621",
-    headers: Optional[Dict[str, str]] = None
+    endpoint: str = "http://localhost:9621", headers: dict[str, str] | None = None
 ) -> bool:
    """Test if LightRAG is accessible"""
    try:
@ -165,20 +162,18 @@ async def test_lightrag_health(
                health_data = response.json()
                print(f"✅ LightRAG is healthy: {health_data.get('status')}")
                return True
-            else:
-                print(f"❌ LightRAG health check failed: {response.status_code}")
-                return False
+            print(f"❌ LightRAG health check failed: {response.status_code}")
+            return False
    except Exception as e:
        print(f"❌ Cannot connect to LightRAG: {e}")
        return False


 async def test_query(
-    endpoint: str = "http://localhost:9621",
-    headers: Optional[Dict[str, str]] = None
+    endpoint: str = "http://localhost:9621", headers: dict[str, str] | None = None
 ) -> None:
    """Test a sample query"""
-    print(f"\n🧪 Testing query...")
+    print("\n🧪 Testing query...")
    try:
        async with httpx.AsyncClient(timeout=30.0) as client:
            request_headers = {"Content-Type": "application/json"}
@ -187,12 +182,12 @@ async def test_query(
            response = await client.post(
                f"{endpoint}/query",
                headers=request_headers,
-                json={"query": "What is this documentation about?", "mode": "local"}
+                json={"query": "What is this documentation about?", "mode": "local"},
            )

            if response.status_code == 200:
                result = response.json()
-                print(f"✅ Query successful!")
+                print("✅ Query successful!")
                print(f"Response: {result['response'][:200]}...")
            else:
                print(f"❌ Query failed: {response.status_code}")
@ -200,7 +195,7 @@ async def test_query(
                    try:
                        error_detail = response.json()
                        print(f"   Error details: {error_detail}")
-                    except:
+                    except Exception:
                        print(f"   Response: {response.text}")

    except Exception as e:
@ -229,35 +224,33 @@ Examples:

  # Load with different documentation base URL
  python load_docs.py docs/ --mode urls --base-url https://my-docs.example.com/docs/
-"""
+""",
    )

    parser.add_argument(
        "docs_path",
        nargs="?",
        default="../apolo-copilot/docs/official-apolo-documentation/docs",
-        help="Path to documentation directory (default: ../apolo-copilot/docs/official-apolo-documentation/docs)"
+        help="Path to documentation directory (default: ../apolo-copilot/docs/official-apolo-documentation/docs)",
    )
    parser.add_argument(
        "--mode",
        choices=["files", "urls"],
        default="files",
-        help="Reference mode: 'files' for file paths, 'urls' for URL references (default: files)"
+        help="Reference mode: 'files' for file paths, 'urls' for URL references (default: files)",
    )
    parser.add_argument(
        "--base-url",
        dest="base_url",
-        help="Base URL for documentation site (required when mode=urls). Example: https://docs.apolo.us/index/"
+        help="Base URL for documentation site (required when mode=urls). Example: https://docs.apolo.us/index/",
    )
    parser.add_argument(
        "--endpoint",
        default="http://localhost:9621",
-        help="LightRAG endpoint URL (default: http://localhost:9621)"
+        help="LightRAG endpoint URL (default: http://localhost:9621)",
    )
    parser.add_argument(
-        "--no-test",
-        action="store_true",
-        help="Skip test query after loading"
+        "--no-test", action="store_true", help="Skip test query after loading"
    )

    args = parser.parse_args()
@ -283,7 +276,9 @@ Examples:

    # Test LightRAG connectivity
    if not await test_lightrag_health(args.endpoint, headers=auth_headers):
-        print("❌ Cannot connect to LightRAG. Please ensure it's running and accessible.")
+        print(
+            "❌ Cannot connect to LightRAG. Please ensure it's running and accessible."
+        )
        sys.exit(1)

    # Load documents
@ -309,15 +304,11 @@ Examples:
    successful = 0
    failed = 0

-    print(f"\n🔄 Starting to load documents...")
+    print("\n🔄 Starting to load documents...")

    for i, (content, title, doc_url) in enumerate(documents):
        success = await load_document_to_lightrag(
-            content,
-            title,
-            doc_url,
-            args.endpoint,
-            headers=auth_headers
+            content, title, doc_url, args.endpoint, headers=auth_headers
        )

        if success:
@ -327,12 +318,14 @@ Examples:

        # Progress update
        if (i + 1) % 10 == 0:
-            print(f"📈 Progress: {i + 1}/{len(documents)} ({successful} success, {failed} failed)")
+            print(
+                f"📈 Progress: {i + 1}/{len(documents)} ({successful} success, {failed} failed)"
+            )

        # Small delay to avoid overwhelming the service
        await asyncio.sleep(0.3)

-    print(f"\n✅ Loading complete!")
+    print("\n✅ Loading complete!")
    print(f"📊 Successful: {successful}")
    print(f"📊 Failed: {failed}")

--- a/pyproject.toml
+++ b/pyproject.toml
@ -34,27 +34,6 @@ pytest-asyncio = "^0.25.3"
 pytest-cov = "^6.2.1"
 mypy = "^1.17.1"

-[tool.ruff]
-target-version = "py311"
-lint.select = [
-    "E", "F", "I", "C90", "UP", "B", "ASYNC", "N", "FBT", "A", "C4", "EM", "FA", "ICN",
-    "G", "PIE", "T20", "PYI", "PT", "RET", "PTH"
-]
-lint.ignore = [
-  "A003",
-  "N818"
-]
-
-[tool.ruff.lint.isort.sections]
-ApoloSDK = ["apolo-sdk"]
-
-[tool.ruff.lint.isort]
-combine-as-imports = true
-lines-after-imports = 2
-section-order = ["future", "standard-library", "third-party", "ApoloSDK", "first-party", "local-folder"]
-known-first-party = ["apolo_app_types"]
-known-local-folder = ["tests"]
-
 [tool.mypy]
 check_untyped_defs = true
 disallow_any_generics = true