From 81dff560f10376283686b20cdf8b6f45919e1012 Mon Sep 17 00:00:00 2001
From: Taddeus <taddeus_b90@hotmail.com>
Date: Mon, 3 Nov 2025 14:21:17 +0200
Subject: [PATCH] Enhance documentation loader and build scripts

Refactors the documentation loading script for improved readability, type hinting, and error handling. Updates CLI argument parsing and output formatting for clarity.

Replaces a simple makefile target with a more robust schema generation makefile including clean and test targets, and adds a placeholder test target to the Helm build system for consistency.

Removes obsolete lint configuration for streamlined tooling setup.

These changes improve maintainability and usability of schema generation and documentation loading workflows.

Relates to MLO-469
---
 .apolo/scripts/gen_types_schemas.mk | 13 +++-
 .apolo/tests/conftest.py            |  2 -
 Makefile                            |  7 ++-
 load_docs.py                        | 97 +++++++++++++----------------
 pyproject.toml                      | 21 -------
 5 files changed, 63 insertions(+), 77 deletions(-)

diff --git a/.apolo/scripts/gen_types_schemas.mk b/.apolo/scripts/gen_types_schemas.mk
index 2db786c4..5c747a97 100644
--- a/.apolo/scripts/gen_types_schemas.mk
+++ b/.apolo/scripts/gen_types_schemas.mk
@@ -1,3 +1,14 @@
-.PHONY: gen-types-schemas
+.PHONY: all clean test gen-types-schemas
+
+all: gen-types-schemas
+
 gen-types-schemas:
 	@.apolo/scripts/gen_types_schemas.sh
+
+clean:
+	@rm -f .apolo/src/apolo_apps_lightrag/schemas/LightRAGAppInputs.json
+	@rm -f .apolo/src/apolo_apps_lightrag/schemas/LightRAGAppOutputs.json
+	@rm -f .apolo/src/apolo_apps_lightrag/types.py
+
+test:
+	@echo "No tests defined for schema generation."
diff --git a/.apolo/tests/conftest.py b/.apolo/tests/conftest.py
index 7fc6a562..e1cee781 100644
--- a/.apolo/tests/conftest.py
+++ b/.apolo/tests/conftest.py
@@ -1,5 +1,3 @@
-import pytest
-
 pytest_plugins = [
     "apolo_app_types_fixtures.apolo_clients",
     "apolo_app_types_fixtures.constants",
diff --git a/Makefile b/Makefile
index b2cad31e..a7b4fc25 100644
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,9 @@ CHART_PACKAGE := $(CHART_PACKAGE_DIR)/$(CHART_NAME)-$(CHART_VERSION).tgz
 
 GITHUB_USERNAME := $(shell echo "$$APOLO_GITHUB_TOKEN" | base64 -d 2>/dev/null | cut -d: -f1 2>/dev/null || echo "oauth2")
 
-.PHONY: help helm-package helm-push clean
+.PHONY: all help helm-package helm-push clean test
+
+all: help
 
 help:
 	@echo "Available targets:"
@@ -51,3 +53,6 @@ clean:
 	@echo "Removing packaged charts..."
 	rm -rf $(CHART_PACKAGE_DIR)
 	@echo "✅ Cleaned"
+
+test:
+	@echo "No automated tests for Helm packaging. Use 'helm test' as needed."
diff --git a/load_docs.py b/load_docs.py
index 7482fdd7..da894eda 100755
--- a/load_docs.py
+++ b/load_docs.py
@@ -4,13 +4,13 @@ Simplified script to load documentation into LightRAG
 Loads all markdown files from a directory structure
 """
 
-import asyncio
-import httpx
 import argparse
-import sys
+import asyncio
 import os
+import sys
 from pathlib import Path
-from typing import Dict, List, Optional
+
+import httpx
 
 
 async def load_document_to_lightrag(
@@ -18,7 +18,7 @@ async def load_document_to_lightrag(
     title: str,
     doc_url: str,
     endpoint: str = "http://localhost:9621",
-    headers: Optional[Dict[str, str]] = None
+    headers: dict[str, str] | None = None,
 ) -> bool:
     """Load a single document to LightRAG with URL reference"""
     try:
@@ -29,24 +29,20 @@ async def load_document_to_lightrag(
             response = await client.post(
                 f"{endpoint}/documents/text",
                 headers=request_headers,
-                json={
-                    "text": content,
-                    "file_source": doc_url
-                }
+                json={"text": content, "file_source": doc_url},
             )
 
             if response.status_code == 200:
                 print(f"✅ Loaded: {title}")
                 return True
-            else:
-                print(f"❌ Failed to load {title}: {response.status_code}")
-                if response.status_code == 500:
-                    try:
-                        error_detail = response.json()
-                        print(f"   Error details: {error_detail}")
-                    except:
-                        print(f"   Response: {response.text}")
-                return False
+            print(f"❌ Failed to load {title}: {response.status_code}")
+            if response.status_code == 500:
+                try:
+                    error_detail = response.json()
+                    print(f"   Error details: {error_detail}")
+                except Exception:
+                    print(f"   Response: {response.text}")
+            return False
 
     except Exception as e:
         print(f"❌ Error loading {title}: {e}")
@@ -56,12 +52,12 @@ async def load_document_to_lightrag(
 def convert_file_path_to_url(relative_path: str, base_url: str) -> str:
     """Convert file path to documentation URL"""
     # Ensure base URL ends with /
-    if not base_url.endswith('/'):
-        base_url += '/'
+    if not base_url.endswith("/"):
+        base_url += "/"
 
     # Handle special cases
     if relative_path in ["README.md", "SUMMARY.md"]:
-        return base_url.rstrip('/')
+        return base_url.rstrip("/")
 
     # Remove .md extension and convert path
     url_path = relative_path.replace(".md", "")
@@ -76,7 +72,9 @@ def convert_file_path_to_url(relative_path: str, base_url: str) -> str:
     return f"{base_url}{url_path}"
 
 
-def load_markdown_files(docs_path: Path, mode: str = "files", base_url: str = None) -> List[tuple]:
+def load_markdown_files(
+    docs_path: Path, mode: str = "files", base_url: str = None
+) -> list[tuple]:
     """Load all markdown files from directory structure
 
     Args:
@@ -102,7 +100,7 @@ def load_markdown_files(docs_path: Path, mode: str = "files", base_url: str = No
     for file_path in md_files:
         try:
             # Load content
-            with open(file_path, 'r', encoding='utf-8') as f:
+            with open(file_path, encoding="utf-8") as f:
                 content = f.read().strip()
 
             if not content:
@@ -133,7 +131,7 @@ Source: {source_info}
             else:  # urls mode
                 # Convert file path to documentation URL
                 reference = convert_file_path_to_url(relative_path, base_url)
-                source_info = f"Documentation Site"
+                source_info = "Documentation Site"
 
                 # Prepare content with URL metadata
                 content_with_metadata = f"""
@@ -154,8 +152,7 @@ Source: {source_info}
 
 
 async def test_lightrag_health(
-    endpoint: str = "http://localhost:9621",
-    headers: Optional[Dict[str, str]] = None
+    endpoint: str = "http://localhost:9621", headers: dict[str, str] | None = None
 ) -> bool:
     """Test if LightRAG is accessible"""
     try:
@@ -165,20 +162,18 @@ async def test_lightrag_health(
                 health_data = response.json()
                 print(f"✅ LightRAG is healthy: {health_data.get('status')}")
                 return True
-            else:
-                print(f"❌ LightRAG health check failed: {response.status_code}")
-                return False
+            print(f"❌ LightRAG health check failed: {response.status_code}")
+            return False
     except Exception as e:
         print(f"❌ Cannot connect to LightRAG: {e}")
         return False
 
 
 async def test_query(
-    endpoint: str = "http://localhost:9621",
-    headers: Optional[Dict[str, str]] = None
+    endpoint: str = "http://localhost:9621", headers: dict[str, str] | None = None
 ) -> None:
     """Test a sample query"""
-    print(f"\n🧪 Testing query...")
+    print("\n🧪 Testing query...")
     try:
         async with httpx.AsyncClient(timeout=30.0) as client:
             request_headers = {"Content-Type": "application/json"}
@@ -187,12 +182,12 @@ async def test_query(
             response = await client.post(
                 f"{endpoint}/query",
                 headers=request_headers,
-                json={"query": "What is this documentation about?", "mode": "local"}
+                json={"query": "What is this documentation about?", "mode": "local"},
             )
 
             if response.status_code == 200:
                 result = response.json()
-                print(f"✅ Query successful!")
+                print("✅ Query successful!")
                 print(f"Response: {result['response'][:200]}...")
             else:
                 print(f"❌ Query failed: {response.status_code}")
@@ -200,7 +195,7 @@ async def test_query(
                     try:
                         error_detail = response.json()
                         print(f"   Error details: {error_detail}")
-                    except:
+                    except Exception:
                         print(f"   Response: {response.text}")
 
     except Exception as e:
@@ -229,35 +224,33 @@ Examples:
 
   # Load with different documentation base URL
   python load_docs.py docs/ --mode urls --base-url https://my-docs.example.com/docs/
-"""
+""",
     )
 
     parser.add_argument(
         "docs_path",
         nargs="?",
         default="../apolo-copilot/docs/official-apolo-documentation/docs",
-        help="Path to documentation directory (default: ../apolo-copilot/docs/official-apolo-documentation/docs)"
+        help="Path to documentation directory (default: ../apolo-copilot/docs/official-apolo-documentation/docs)",
     )
     parser.add_argument(
         "--mode",
         choices=["files", "urls"],
         default="files",
-        help="Reference mode: 'files' for file paths, 'urls' for URL references (default: files)"
+        help="Reference mode: 'files' for file paths, 'urls' for URL references (default: files)",
     )
     parser.add_argument(
         "--base-url",
         dest="base_url",
-        help="Base URL for documentation site (required when mode=urls). Example: https://docs.apolo.us/index/"
+        help="Base URL for documentation site (required when mode=urls). Example: https://docs.apolo.us/index/",
     )
     parser.add_argument(
         "--endpoint",
         default="http://localhost:9621",
-        help="LightRAG endpoint URL (default: http://localhost:9621)"
+        help="LightRAG endpoint URL (default: http://localhost:9621)",
     )
     parser.add_argument(
-        "--no-test",
-        action="store_true",
-        help="Skip test query after loading"
+        "--no-test", action="store_true", help="Skip test query after loading"
     )
 
     args = parser.parse_args()
@@ -283,7 +276,9 @@ Examples:
 
     # Test LightRAG connectivity
     if not await test_lightrag_health(args.endpoint, headers=auth_headers):
-        print("❌ Cannot connect to LightRAG. Please ensure it's running and accessible.")
+        print(
+            "❌ Cannot connect to LightRAG. Please ensure it's running and accessible."
+        )
         sys.exit(1)
 
     # Load documents
@@ -309,15 +304,11 @@ Examples:
     successful = 0
     failed = 0
 
-    print(f"\n🔄 Starting to load documents...")
+    print("\n🔄 Starting to load documents...")
 
     for i, (content, title, doc_url) in enumerate(documents):
         success = await load_document_to_lightrag(
-            content,
-            title,
-            doc_url,
-            args.endpoint,
-            headers=auth_headers
+            content, title, doc_url, args.endpoint, headers=auth_headers
         )
 
         if success:
@@ -327,12 +318,14 @@ Examples:
 
         # Progress update
         if (i + 1) % 10 == 0:
-            print(f"📈 Progress: {i + 1}/{len(documents)} ({successful} success, {failed} failed)")
+            print(
+                f"📈 Progress: {i + 1}/{len(documents)} ({successful} success, {failed} failed)"
+            )
 
         # Small delay to avoid overwhelming the service
         await asyncio.sleep(0.3)
 
-    print(f"\n✅ Loading complete!")
+    print("\n✅ Loading complete!")
     print(f"📊 Successful: {successful}")
     print(f"📊 Failed: {failed}")
 
diff --git a/pyproject.toml b/pyproject.toml
index 0bac3d88..70289ef3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,27 +34,6 @@ pytest-asyncio = "^0.25.3"
 pytest-cov = "^6.2.1"
 mypy = "^1.17.1"
 
-[tool.ruff]
-target-version = "py311"
-lint.select = [
-    "E", "F", "I", "C90", "UP", "B", "ASYNC", "N", "FBT", "A", "C4", "EM", "FA", "ICN",
-    "G", "PIE", "T20", "PYI", "PT", "RET", "PTH"
-]
-lint.ignore = [
-  "A003",
-  "N818"
-]
-
-[tool.ruff.lint.isort.sections]
-ApoloSDK = ["apolo-sdk"]
-
-[tool.ruff.lint.isort]
-combine-as-imports = true
-lines-after-imports = 2
-section-order = ["future", "standard-library", "third-party", "ApoloSDK", "first-party", "local-folder"]
-known-first-party = ["apolo_app_types"]
-known-local-folder = ["tests"]
-
 [tool.mypy]
 check_untyped_defs = true
 disallow_any_generics = true