From 81dff560f10376283686b20cdf8b6f45919e1012 Mon Sep 17 00:00:00 2001 From: Taddeus Date: Mon, 3 Nov 2025 14:21:17 +0200 Subject: [PATCH] Enhance documentation loader and build scripts Refactors the documentation loading script for improved readability, type hinting, and error handling. Updates CLI argument parsing and output formatting for clarity. Replaces a simple makefile target with a more robust schema generation makefile including clean and test targets, and adds a placeholder test target to the Helm build system for consistency. Removes obsolete lint configuration for streamlined tooling setup. These changes improve maintainability and usability of schema generation and documentation loading workflows. Relates to MLO-469 --- .apolo/scripts/gen_types_schemas.mk | 13 +++- .apolo/tests/conftest.py | 2 - Makefile | 7 ++- load_docs.py | 97 +++++++++++++---------------- pyproject.toml | 21 ------- 5 files changed, 63 insertions(+), 77 deletions(-) diff --git a/.apolo/scripts/gen_types_schemas.mk b/.apolo/scripts/gen_types_schemas.mk index 2db786c4..5c747a97 100644 --- a/.apolo/scripts/gen_types_schemas.mk +++ b/.apolo/scripts/gen_types_schemas.mk @@ -1,3 +1,14 @@ -.PHONY: gen-types-schemas +.PHONY: all clean test gen-types-schemas + +all: gen-types-schemas + gen-types-schemas: @.apolo/scripts/gen_types_schemas.sh + +clean: + @rm -f .apolo/src/apolo_apps_lightrag/schemas/LightRAGAppInputs.json + @rm -f .apolo/src/apolo_apps_lightrag/schemas/LightRAGAppOutputs.json + @rm -f .apolo/src/apolo_apps_lightrag/types.py + +test: + @echo "No tests defined for schema generation." diff --git a/.apolo/tests/conftest.py b/.apolo/tests/conftest.py index 7fc6a562..e1cee781 100644 --- a/.apolo/tests/conftest.py +++ b/.apolo/tests/conftest.py @@ -1,5 +1,3 @@ -import pytest - pytest_plugins = [ "apolo_app_types_fixtures.apolo_clients", "apolo_app_types_fixtures.constants", diff --git a/Makefile b/Makefile index b2cad31e..a7b4fc25 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,9 @@ CHART_PACKAGE := $(CHART_PACKAGE_DIR)/$(CHART_NAME)-$(CHART_VERSION).tgz GITHUB_USERNAME := $(shell echo "$$APOLO_GITHUB_TOKEN" | base64 -d 2>/dev/null | cut -d: -f1 2>/dev/null || echo "oauth2") -.PHONY: help helm-package helm-push clean +.PHONY: all help helm-package helm-push clean test + +all: help help: @echo "Available targets:" @@ -51,3 +53,6 @@ clean: @echo "Removing packaged charts..." rm -rf $(CHART_PACKAGE_DIR) @echo "✅ Cleaned" + +test: + @echo "No automated tests for Helm packaging. Use 'helm test' as needed." diff --git a/load_docs.py b/load_docs.py index 7482fdd7..da894eda 100755 --- a/load_docs.py +++ b/load_docs.py @@ -4,13 +4,13 @@ Simplified script to load documentation into LightRAG Loads all markdown files from a directory structure """ -import asyncio -import httpx import argparse -import sys +import asyncio import os +import sys from pathlib import Path -from typing import Dict, List, Optional + +import httpx async def load_document_to_lightrag( @@ -18,7 +18,7 @@ async def load_document_to_lightrag( title: str, doc_url: str, endpoint: str = "http://localhost:9621", - headers: Optional[Dict[str, str]] = None + headers: dict[str, str] | None = None, ) -> bool: """Load a single document to LightRAG with URL reference""" try: @@ -29,24 +29,20 @@ async def load_document_to_lightrag( response = await client.post( f"{endpoint}/documents/text", headers=request_headers, - json={ - "text": content, - "file_source": doc_url - } + json={"text": content, "file_source": doc_url}, ) if response.status_code == 200: print(f"✅ Loaded: {title}") return True - else: - print(f"❌ Failed to load {title}: {response.status_code}") - if response.status_code == 500: - try: - error_detail = response.json() - print(f" Error details: {error_detail}") - except: - print(f" Response: {response.text}") - return False + print(f"❌ Failed to load {title}: {response.status_code}") + if response.status_code == 500: + try: + error_detail = response.json() + print(f" Error details: {error_detail}") + except Exception: + print(f" Response: {response.text}") + return False except Exception as e: print(f"❌ Error loading {title}: {e}") @@ -56,12 +52,12 @@ async def load_document_to_lightrag( def convert_file_path_to_url(relative_path: str, base_url: str) -> str: """Convert file path to documentation URL""" # Ensure base URL ends with / - if not base_url.endswith('/'): - base_url += '/' + if not base_url.endswith("/"): + base_url += "/" # Handle special cases if relative_path in ["README.md", "SUMMARY.md"]: - return base_url.rstrip('/') + return base_url.rstrip("/") # Remove .md extension and convert path url_path = relative_path.replace(".md", "") @@ -76,7 +72,9 @@ def convert_file_path_to_url(relative_path: str, base_url: str) -> str: return f"{base_url}{url_path}" -def load_markdown_files(docs_path: Path, mode: str = "files", base_url: str = None) -> List[tuple]: +def load_markdown_files( + docs_path: Path, mode: str = "files", base_url: str = None +) -> list[tuple]: """Load all markdown files from directory structure Args: @@ -102,7 +100,7 @@ def load_markdown_files(docs_path: Path, mode: str = "files", base_url: str = No for file_path in md_files: try: # Load content - with open(file_path, 'r', encoding='utf-8') as f: + with open(file_path, encoding="utf-8") as f: content = f.read().strip() if not content: @@ -133,7 +131,7 @@ Source: {source_info} else: # urls mode # Convert file path to documentation URL reference = convert_file_path_to_url(relative_path, base_url) - source_info = f"Documentation Site" + source_info = "Documentation Site" # Prepare content with URL metadata content_with_metadata = f""" @@ -154,8 +152,7 @@ Source: {source_info} async def test_lightrag_health( - endpoint: str = "http://localhost:9621", - headers: Optional[Dict[str, str]] = None + endpoint: str = "http://localhost:9621", headers: dict[str, str] | None = None ) -> bool: """Test if LightRAG is accessible""" try: @@ -165,20 +162,18 @@ async def test_lightrag_health( health_data = response.json() print(f"✅ LightRAG is healthy: {health_data.get('status')}") return True - else: - print(f"❌ LightRAG health check failed: {response.status_code}") - return False + print(f"❌ LightRAG health check failed: {response.status_code}") + return False except Exception as e: print(f"❌ Cannot connect to LightRAG: {e}") return False async def test_query( - endpoint: str = "http://localhost:9621", - headers: Optional[Dict[str, str]] = None + endpoint: str = "http://localhost:9621", headers: dict[str, str] | None = None ) -> None: """Test a sample query""" - print(f"\n🧪 Testing query...") + print("\n🧪 Testing query...") try: async with httpx.AsyncClient(timeout=30.0) as client: request_headers = {"Content-Type": "application/json"} @@ -187,12 +182,12 @@ async def test_query( response = await client.post( f"{endpoint}/query", headers=request_headers, - json={"query": "What is this documentation about?", "mode": "local"} + json={"query": "What is this documentation about?", "mode": "local"}, ) if response.status_code == 200: result = response.json() - print(f"✅ Query successful!") + print("✅ Query successful!") print(f"Response: {result['response'][:200]}...") else: print(f"❌ Query failed: {response.status_code}") @@ -200,7 +195,7 @@ async def test_query( try: error_detail = response.json() print(f" Error details: {error_detail}") - except: + except Exception: print(f" Response: {response.text}") except Exception as e: @@ -229,35 +224,33 @@ Examples: # Load with different documentation base URL python load_docs.py docs/ --mode urls --base-url https://my-docs.example.com/docs/ -""" +""", ) parser.add_argument( "docs_path", nargs="?", default="../apolo-copilot/docs/official-apolo-documentation/docs", - help="Path to documentation directory (default: ../apolo-copilot/docs/official-apolo-documentation/docs)" + help="Path to documentation directory (default: ../apolo-copilot/docs/official-apolo-documentation/docs)", ) parser.add_argument( "--mode", choices=["files", "urls"], default="files", - help="Reference mode: 'files' for file paths, 'urls' for URL references (default: files)" + help="Reference mode: 'files' for file paths, 'urls' for URL references (default: files)", ) parser.add_argument( "--base-url", dest="base_url", - help="Base URL for documentation site (required when mode=urls). Example: https://docs.apolo.us/index/" + help="Base URL for documentation site (required when mode=urls). Example: https://docs.apolo.us/index/", ) parser.add_argument( "--endpoint", default="http://localhost:9621", - help="LightRAG endpoint URL (default: http://localhost:9621)" + help="LightRAG endpoint URL (default: http://localhost:9621)", ) parser.add_argument( - "--no-test", - action="store_true", - help="Skip test query after loading" + "--no-test", action="store_true", help="Skip test query after loading" ) args = parser.parse_args() @@ -283,7 +276,9 @@ Examples: # Test LightRAG connectivity if not await test_lightrag_health(args.endpoint, headers=auth_headers): - print("❌ Cannot connect to LightRAG. Please ensure it's running and accessible.") + print( + "❌ Cannot connect to LightRAG. Please ensure it's running and accessible." + ) sys.exit(1) # Load documents @@ -309,15 +304,11 @@ Examples: successful = 0 failed = 0 - print(f"\n🔄 Starting to load documents...") + print("\n🔄 Starting to load documents...") for i, (content, title, doc_url) in enumerate(documents): success = await load_document_to_lightrag( - content, - title, - doc_url, - args.endpoint, - headers=auth_headers + content, title, doc_url, args.endpoint, headers=auth_headers ) if success: @@ -327,12 +318,14 @@ Examples: # Progress update if (i + 1) % 10 == 0: - print(f"📈 Progress: {i + 1}/{len(documents)} ({successful} success, {failed} failed)") + print( + f"📈 Progress: {i + 1}/{len(documents)} ({successful} success, {failed} failed)" + ) # Small delay to avoid overwhelming the service await asyncio.sleep(0.3) - print(f"\n✅ Loading complete!") + print("\n✅ Loading complete!") print(f"📊 Successful: {successful}") print(f"📊 Failed: {failed}") diff --git a/pyproject.toml b/pyproject.toml index 0bac3d88..70289ef3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,27 +34,6 @@ pytest-asyncio = "^0.25.3" pytest-cov = "^6.2.1" mypy = "^1.17.1" -[tool.ruff] -target-version = "py311" -lint.select = [ - "E", "F", "I", "C90", "UP", "B", "ASYNC", "N", "FBT", "A", "C4", "EM", "FA", "ICN", - "G", "PIE", "T20", "PYI", "PT", "RET", "PTH" -] -lint.ignore = [ - "A003", - "N818" -] - -[tool.ruff.lint.isort.sections] -ApoloSDK = ["apolo-sdk"] - -[tool.ruff.lint.isort] -combine-as-imports = true -lines-after-imports = 2 -section-order = ["future", "standard-library", "third-party", "ApoloSDK", "first-party", "local-folder"] -known-first-party = ["apolo_app_types"] -known-local-folder = ["tests"] - [tool.mypy] check_untyped_defs = true disallow_any_generics = true