Enhance documentation loader and build scripts

Refactors the documentation loading script for improved readability, type hinting, and error handling. Updates CLI argument parsing and output formatting for clarity.

Replaces a simple makefile target with a more robust schema generation makefile including clean and test targets, and adds a placeholder test target to the Helm build system for consistency.

Removes obsolete lint configuration for streamlined tooling setup.

These changes improve maintainability and usability of schema generation and documentation loading workflows.

Relates to MLO-469
This commit is contained in:
Taddeus 2025-11-03 14:21:17 +02:00
parent c11e91c614
commit 81dff560f1
5 changed files with 63 additions and 77 deletions

View file

@ -1,3 +1,14 @@
.PHONY: gen-types-schemas
.PHONY: all clean test gen-types-schemas
all: gen-types-schemas
gen-types-schemas:
@.apolo/scripts/gen_types_schemas.sh
clean:
@rm -f .apolo/src/apolo_apps_lightrag/schemas/LightRAGAppInputs.json
@rm -f .apolo/src/apolo_apps_lightrag/schemas/LightRAGAppOutputs.json
@rm -f .apolo/src/apolo_apps_lightrag/types.py
test:
@echo "No tests defined for schema generation."

View file

@ -1,5 +1,3 @@
import pytest
pytest_plugins = [
"apolo_app_types_fixtures.apolo_clients",
"apolo_app_types_fixtures.constants",

View file

@ -13,7 +13,9 @@ CHART_PACKAGE := $(CHART_PACKAGE_DIR)/$(CHART_NAME)-$(CHART_VERSION).tgz
GITHUB_USERNAME := $(shell echo "$$APOLO_GITHUB_TOKEN" | base64 -d 2>/dev/null | cut -d: -f1 2>/dev/null || echo "oauth2")
.PHONY: help helm-package helm-push clean
.PHONY: all help helm-package helm-push clean test
all: help
help:
@echo "Available targets:"
@ -51,3 +53,6 @@ clean:
@echo "Removing packaged charts..."
rm -rf $(CHART_PACKAGE_DIR)
@echo "✅ Cleaned"
test:
@echo "No automated tests for Helm packaging. Use 'helm test' as needed."

View file

@ -4,13 +4,13 @@ Simplified script to load documentation into LightRAG
Loads all markdown files from a directory structure
"""
import asyncio
import httpx
import argparse
import sys
import asyncio
import os
import sys
from pathlib import Path
from typing import Dict, List, Optional
import httpx
async def load_document_to_lightrag(
@ -18,7 +18,7 @@ async def load_document_to_lightrag(
title: str,
doc_url: str,
endpoint: str = "http://localhost:9621",
headers: Optional[Dict[str, str]] = None
headers: dict[str, str] | None = None,
) -> bool:
"""Load a single document to LightRAG with URL reference"""
try:
@ -29,24 +29,20 @@ async def load_document_to_lightrag(
response = await client.post(
f"{endpoint}/documents/text",
headers=request_headers,
json={
"text": content,
"file_source": doc_url
}
json={"text": content, "file_source": doc_url},
)
if response.status_code == 200:
print(f"✅ Loaded: {title}")
return True
else:
print(f"❌ Failed to load {title}: {response.status_code}")
if response.status_code == 500:
try:
error_detail = response.json()
print(f" Error details: {error_detail}")
except:
print(f" Response: {response.text}")
return False
print(f"❌ Failed to load {title}: {response.status_code}")
if response.status_code == 500:
try:
error_detail = response.json()
print(f" Error details: {error_detail}")
except Exception:
print(f" Response: {response.text}")
return False
except Exception as e:
print(f"❌ Error loading {title}: {e}")
@ -56,12 +52,12 @@ async def load_document_to_lightrag(
def convert_file_path_to_url(relative_path: str, base_url: str) -> str:
"""Convert file path to documentation URL"""
# Ensure base URL ends with /
if not base_url.endswith('/'):
base_url += '/'
if not base_url.endswith("/"):
base_url += "/"
# Handle special cases
if relative_path in ["README.md", "SUMMARY.md"]:
return base_url.rstrip('/')
return base_url.rstrip("/")
# Remove .md extension and convert path
url_path = relative_path.replace(".md", "")
@ -76,7 +72,9 @@ def convert_file_path_to_url(relative_path: str, base_url: str) -> str:
return f"{base_url}{url_path}"
def load_markdown_files(docs_path: Path, mode: str = "files", base_url: str = None) -> List[tuple]:
def load_markdown_files(
docs_path: Path, mode: str = "files", base_url: str = None
) -> list[tuple]:
"""Load all markdown files from directory structure
Args:
@ -102,7 +100,7 @@ def load_markdown_files(docs_path: Path, mode: str = "files", base_url: str = No
for file_path in md_files:
try:
# Load content
with open(file_path, 'r', encoding='utf-8') as f:
with open(file_path, encoding="utf-8") as f:
content = f.read().strip()
if not content:
@ -133,7 +131,7 @@ Source: {source_info}
else: # urls mode
# Convert file path to documentation URL
reference = convert_file_path_to_url(relative_path, base_url)
source_info = f"Documentation Site"
source_info = "Documentation Site"
# Prepare content with URL metadata
content_with_metadata = f"""
@ -154,8 +152,7 @@ Source: {source_info}
async def test_lightrag_health(
endpoint: str = "http://localhost:9621",
headers: Optional[Dict[str, str]] = None
endpoint: str = "http://localhost:9621", headers: dict[str, str] | None = None
) -> bool:
"""Test if LightRAG is accessible"""
try:
@ -165,20 +162,18 @@ async def test_lightrag_health(
health_data = response.json()
print(f"✅ LightRAG is healthy: {health_data.get('status')}")
return True
else:
print(f"❌ LightRAG health check failed: {response.status_code}")
return False
print(f"❌ LightRAG health check failed: {response.status_code}")
return False
except Exception as e:
print(f"❌ Cannot connect to LightRAG: {e}")
return False
async def test_query(
endpoint: str = "http://localhost:9621",
headers: Optional[Dict[str, str]] = None
endpoint: str = "http://localhost:9621", headers: dict[str, str] | None = None
) -> None:
"""Test a sample query"""
print(f"\n🧪 Testing query...")
print("\n🧪 Testing query...")
try:
async with httpx.AsyncClient(timeout=30.0) as client:
request_headers = {"Content-Type": "application/json"}
@ -187,12 +182,12 @@ async def test_query(
response = await client.post(
f"{endpoint}/query",
headers=request_headers,
json={"query": "What is this documentation about?", "mode": "local"}
json={"query": "What is this documentation about?", "mode": "local"},
)
if response.status_code == 200:
result = response.json()
print(f"✅ Query successful!")
print("✅ Query successful!")
print(f"Response: {result['response'][:200]}...")
else:
print(f"❌ Query failed: {response.status_code}")
@ -200,7 +195,7 @@ async def test_query(
try:
error_detail = response.json()
print(f" Error details: {error_detail}")
except:
except Exception:
print(f" Response: {response.text}")
except Exception as e:
@ -229,35 +224,33 @@ Examples:
# Load with different documentation base URL
python load_docs.py docs/ --mode urls --base-url https://my-docs.example.com/docs/
"""
""",
)
parser.add_argument(
"docs_path",
nargs="?",
default="../apolo-copilot/docs/official-apolo-documentation/docs",
help="Path to documentation directory (default: ../apolo-copilot/docs/official-apolo-documentation/docs)"
help="Path to documentation directory (default: ../apolo-copilot/docs/official-apolo-documentation/docs)",
)
parser.add_argument(
"--mode",
choices=["files", "urls"],
default="files",
help="Reference mode: 'files' for file paths, 'urls' for URL references (default: files)"
help="Reference mode: 'files' for file paths, 'urls' for URL references (default: files)",
)
parser.add_argument(
"--base-url",
dest="base_url",
help="Base URL for documentation site (required when mode=urls). Example: https://docs.apolo.us/index/"
help="Base URL for documentation site (required when mode=urls). Example: https://docs.apolo.us/index/",
)
parser.add_argument(
"--endpoint",
default="http://localhost:9621",
help="LightRAG endpoint URL (default: http://localhost:9621)"
help="LightRAG endpoint URL (default: http://localhost:9621)",
)
parser.add_argument(
"--no-test",
action="store_true",
help="Skip test query after loading"
"--no-test", action="store_true", help="Skip test query after loading"
)
args = parser.parse_args()
@ -283,7 +276,9 @@ Examples:
# Test LightRAG connectivity
if not await test_lightrag_health(args.endpoint, headers=auth_headers):
print("❌ Cannot connect to LightRAG. Please ensure it's running and accessible.")
print(
"❌ Cannot connect to LightRAG. Please ensure it's running and accessible."
)
sys.exit(1)
# Load documents
@ -309,15 +304,11 @@ Examples:
successful = 0
failed = 0
print(f"\n🔄 Starting to load documents...")
print("\n🔄 Starting to load documents...")
for i, (content, title, doc_url) in enumerate(documents):
success = await load_document_to_lightrag(
content,
title,
doc_url,
args.endpoint,
headers=auth_headers
content, title, doc_url, args.endpoint, headers=auth_headers
)
if success:
@ -327,12 +318,14 @@ Examples:
# Progress update
if (i + 1) % 10 == 0:
print(f"📈 Progress: {i + 1}/{len(documents)} ({successful} success, {failed} failed)")
print(
f"📈 Progress: {i + 1}/{len(documents)} ({successful} success, {failed} failed)"
)
# Small delay to avoid overwhelming the service
await asyncio.sleep(0.3)
print(f"\n✅ Loading complete!")
print("\n✅ Loading complete!")
print(f"📊 Successful: {successful}")
print(f"📊 Failed: {failed}")

View file

@ -34,27 +34,6 @@ pytest-asyncio = "^0.25.3"
pytest-cov = "^6.2.1"
mypy = "^1.17.1"
[tool.ruff]
target-version = "py311"
lint.select = [
"E", "F", "I", "C90", "UP", "B", "ASYNC", "N", "FBT", "A", "C4", "EM", "FA", "ICN",
"G", "PIE", "T20", "PYI", "PT", "RET", "PTH"
]
lint.ignore = [
"A003",
"N818"
]
[tool.ruff.lint.isort.sections]
ApoloSDK = ["apolo-sdk"]
[tool.ruff.lint.isort]
combine-as-imports = true
lines-after-imports = 2
section-order = ["future", "standard-library", "third-party", "ApoloSDK", "first-party", "local-folder"]
known-first-party = ["apolo_app_types"]
known-local-folder = ["tests"]
[tool.mypy]
check_untyped_defs = true
disallow_any_generics = true