Enhance documentation loader and build scripts
Refactors the documentation loading script for improved readability, type hinting, and error handling. Updates CLI argument parsing and output formatting for clarity. Replaces a simple makefile target with a more robust schema generation makefile including clean and test targets, and adds a placeholder test target to the Helm build system for consistency. Removes obsolete lint configuration for streamlined tooling setup. These changes improve maintainability and usability of schema generation and documentation loading workflows. Relates to MLO-469
This commit is contained in:
parent
c11e91c614
commit
81dff560f1
5 changed files with 63 additions and 77 deletions
|
|
@ -1,3 +1,14 @@
|
|||
.PHONY: gen-types-schemas
|
||||
.PHONY: all clean test gen-types-schemas
|
||||
|
||||
all: gen-types-schemas
|
||||
|
||||
gen-types-schemas:
|
||||
@.apolo/scripts/gen_types_schemas.sh
|
||||
|
||||
clean:
|
||||
@rm -f .apolo/src/apolo_apps_lightrag/schemas/LightRAGAppInputs.json
|
||||
@rm -f .apolo/src/apolo_apps_lightrag/schemas/LightRAGAppOutputs.json
|
||||
@rm -f .apolo/src/apolo_apps_lightrag/types.py
|
||||
|
||||
test:
|
||||
@echo "No tests defined for schema generation."
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
import pytest
|
||||
|
||||
pytest_plugins = [
|
||||
"apolo_app_types_fixtures.apolo_clients",
|
||||
"apolo_app_types_fixtures.constants",
|
||||
|
|
|
|||
7
Makefile
7
Makefile
|
|
@ -13,7 +13,9 @@ CHART_PACKAGE := $(CHART_PACKAGE_DIR)/$(CHART_NAME)-$(CHART_VERSION).tgz
|
|||
|
||||
GITHUB_USERNAME := $(shell echo "$$APOLO_GITHUB_TOKEN" | base64 -d 2>/dev/null | cut -d: -f1 2>/dev/null || echo "oauth2")
|
||||
|
||||
.PHONY: help helm-package helm-push clean
|
||||
.PHONY: all help helm-package helm-push clean test
|
||||
|
||||
all: help
|
||||
|
||||
help:
|
||||
@echo "Available targets:"
|
||||
|
|
@ -51,3 +53,6 @@ clean:
|
|||
@echo "Removing packaged charts..."
|
||||
rm -rf $(CHART_PACKAGE_DIR)
|
||||
@echo "✅ Cleaned"
|
||||
|
||||
test:
|
||||
@echo "No automated tests for Helm packaging. Use 'helm test' as needed."
|
||||
|
|
|
|||
97
load_docs.py
97
load_docs.py
|
|
@ -4,13 +4,13 @@ Simplified script to load documentation into LightRAG
|
|||
Loads all markdown files from a directory structure
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
import argparse
|
||||
import sys
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
async def load_document_to_lightrag(
|
||||
|
|
@ -18,7 +18,7 @@ async def load_document_to_lightrag(
|
|||
title: str,
|
||||
doc_url: str,
|
||||
endpoint: str = "http://localhost:9621",
|
||||
headers: Optional[Dict[str, str]] = None
|
||||
headers: dict[str, str] | None = None,
|
||||
) -> bool:
|
||||
"""Load a single document to LightRAG with URL reference"""
|
||||
try:
|
||||
|
|
@ -29,24 +29,20 @@ async def load_document_to_lightrag(
|
|||
response = await client.post(
|
||||
f"{endpoint}/documents/text",
|
||||
headers=request_headers,
|
||||
json={
|
||||
"text": content,
|
||||
"file_source": doc_url
|
||||
}
|
||||
json={"text": content, "file_source": doc_url},
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
print(f"✅ Loaded: {title}")
|
||||
return True
|
||||
else:
|
||||
print(f"❌ Failed to load {title}: {response.status_code}")
|
||||
if response.status_code == 500:
|
||||
try:
|
||||
error_detail = response.json()
|
||||
print(f" Error details: {error_detail}")
|
||||
except:
|
||||
print(f" Response: {response.text}")
|
||||
return False
|
||||
print(f"❌ Failed to load {title}: {response.status_code}")
|
||||
if response.status_code == 500:
|
||||
try:
|
||||
error_detail = response.json()
|
||||
print(f" Error details: {error_detail}")
|
||||
except Exception:
|
||||
print(f" Response: {response.text}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error loading {title}: {e}")
|
||||
|
|
@ -56,12 +52,12 @@ async def load_document_to_lightrag(
|
|||
def convert_file_path_to_url(relative_path: str, base_url: str) -> str:
|
||||
"""Convert file path to documentation URL"""
|
||||
# Ensure base URL ends with /
|
||||
if not base_url.endswith('/'):
|
||||
base_url += '/'
|
||||
if not base_url.endswith("/"):
|
||||
base_url += "/"
|
||||
|
||||
# Handle special cases
|
||||
if relative_path in ["README.md", "SUMMARY.md"]:
|
||||
return base_url.rstrip('/')
|
||||
return base_url.rstrip("/")
|
||||
|
||||
# Remove .md extension and convert path
|
||||
url_path = relative_path.replace(".md", "")
|
||||
|
|
@ -76,7 +72,9 @@ def convert_file_path_to_url(relative_path: str, base_url: str) -> str:
|
|||
return f"{base_url}{url_path}"
|
||||
|
||||
|
||||
def load_markdown_files(docs_path: Path, mode: str = "files", base_url: str = None) -> List[tuple]:
|
||||
def load_markdown_files(
|
||||
docs_path: Path, mode: str = "files", base_url: str = None
|
||||
) -> list[tuple]:
|
||||
"""Load all markdown files from directory structure
|
||||
|
||||
Args:
|
||||
|
|
@ -102,7 +100,7 @@ def load_markdown_files(docs_path: Path, mode: str = "files", base_url: str = No
|
|||
for file_path in md_files:
|
||||
try:
|
||||
# Load content
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
with open(file_path, encoding="utf-8") as f:
|
||||
content = f.read().strip()
|
||||
|
||||
if not content:
|
||||
|
|
@ -133,7 +131,7 @@ Source: {source_info}
|
|||
else: # urls mode
|
||||
# Convert file path to documentation URL
|
||||
reference = convert_file_path_to_url(relative_path, base_url)
|
||||
source_info = f"Documentation Site"
|
||||
source_info = "Documentation Site"
|
||||
|
||||
# Prepare content with URL metadata
|
||||
content_with_metadata = f"""
|
||||
|
|
@ -154,8 +152,7 @@ Source: {source_info}
|
|||
|
||||
|
||||
async def test_lightrag_health(
|
||||
endpoint: str = "http://localhost:9621",
|
||||
headers: Optional[Dict[str, str]] = None
|
||||
endpoint: str = "http://localhost:9621", headers: dict[str, str] | None = None
|
||||
) -> bool:
|
||||
"""Test if LightRAG is accessible"""
|
||||
try:
|
||||
|
|
@ -165,20 +162,18 @@ async def test_lightrag_health(
|
|||
health_data = response.json()
|
||||
print(f"✅ LightRAG is healthy: {health_data.get('status')}")
|
||||
return True
|
||||
else:
|
||||
print(f"❌ LightRAG health check failed: {response.status_code}")
|
||||
return False
|
||||
print(f"❌ LightRAG health check failed: {response.status_code}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"❌ Cannot connect to LightRAG: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def test_query(
|
||||
endpoint: str = "http://localhost:9621",
|
||||
headers: Optional[Dict[str, str]] = None
|
||||
endpoint: str = "http://localhost:9621", headers: dict[str, str] | None = None
|
||||
) -> None:
|
||||
"""Test a sample query"""
|
||||
print(f"\n🧪 Testing query...")
|
||||
print("\n🧪 Testing query...")
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
request_headers = {"Content-Type": "application/json"}
|
||||
|
|
@ -187,12 +182,12 @@ async def test_query(
|
|||
response = await client.post(
|
||||
f"{endpoint}/query",
|
||||
headers=request_headers,
|
||||
json={"query": "What is this documentation about?", "mode": "local"}
|
||||
json={"query": "What is this documentation about?", "mode": "local"},
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
print(f"✅ Query successful!")
|
||||
print("✅ Query successful!")
|
||||
print(f"Response: {result['response'][:200]}...")
|
||||
else:
|
||||
print(f"❌ Query failed: {response.status_code}")
|
||||
|
|
@ -200,7 +195,7 @@ async def test_query(
|
|||
try:
|
||||
error_detail = response.json()
|
||||
print(f" Error details: {error_detail}")
|
||||
except:
|
||||
except Exception:
|
||||
print(f" Response: {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -229,35 +224,33 @@ Examples:
|
|||
|
||||
# Load with different documentation base URL
|
||||
python load_docs.py docs/ --mode urls --base-url https://my-docs.example.com/docs/
|
||||
"""
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"docs_path",
|
||||
nargs="?",
|
||||
default="../apolo-copilot/docs/official-apolo-documentation/docs",
|
||||
help="Path to documentation directory (default: ../apolo-copilot/docs/official-apolo-documentation/docs)"
|
||||
help="Path to documentation directory (default: ../apolo-copilot/docs/official-apolo-documentation/docs)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
choices=["files", "urls"],
|
||||
default="files",
|
||||
help="Reference mode: 'files' for file paths, 'urls' for URL references (default: files)"
|
||||
help="Reference mode: 'files' for file paths, 'urls' for URL references (default: files)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--base-url",
|
||||
dest="base_url",
|
||||
help="Base URL for documentation site (required when mode=urls). Example: https://docs.apolo.us/index/"
|
||||
help="Base URL for documentation site (required when mode=urls). Example: https://docs.apolo.us/index/",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--endpoint",
|
||||
default="http://localhost:9621",
|
||||
help="LightRAG endpoint URL (default: http://localhost:9621)"
|
||||
help="LightRAG endpoint URL (default: http://localhost:9621)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-test",
|
||||
action="store_true",
|
||||
help="Skip test query after loading"
|
||||
"--no-test", action="store_true", help="Skip test query after loading"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
|
@ -283,7 +276,9 @@ Examples:
|
|||
|
||||
# Test LightRAG connectivity
|
||||
if not await test_lightrag_health(args.endpoint, headers=auth_headers):
|
||||
print("❌ Cannot connect to LightRAG. Please ensure it's running and accessible.")
|
||||
print(
|
||||
"❌ Cannot connect to LightRAG. Please ensure it's running and accessible."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# Load documents
|
||||
|
|
@ -309,15 +304,11 @@ Examples:
|
|||
successful = 0
|
||||
failed = 0
|
||||
|
||||
print(f"\n🔄 Starting to load documents...")
|
||||
print("\n🔄 Starting to load documents...")
|
||||
|
||||
for i, (content, title, doc_url) in enumerate(documents):
|
||||
success = await load_document_to_lightrag(
|
||||
content,
|
||||
title,
|
||||
doc_url,
|
||||
args.endpoint,
|
||||
headers=auth_headers
|
||||
content, title, doc_url, args.endpoint, headers=auth_headers
|
||||
)
|
||||
|
||||
if success:
|
||||
|
|
@ -327,12 +318,14 @@ Examples:
|
|||
|
||||
# Progress update
|
||||
if (i + 1) % 10 == 0:
|
||||
print(f"📈 Progress: {i + 1}/{len(documents)} ({successful} success, {failed} failed)")
|
||||
print(
|
||||
f"📈 Progress: {i + 1}/{len(documents)} ({successful} success, {failed} failed)"
|
||||
)
|
||||
|
||||
# Small delay to avoid overwhelming the service
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
print(f"\n✅ Loading complete!")
|
||||
print("\n✅ Loading complete!")
|
||||
print(f"📊 Successful: {successful}")
|
||||
print(f"📊 Failed: {failed}")
|
||||
|
||||
|
|
|
|||
|
|
@ -34,27 +34,6 @@ pytest-asyncio = "^0.25.3"
|
|||
pytest-cov = "^6.2.1"
|
||||
mypy = "^1.17.1"
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py311"
|
||||
lint.select = [
|
||||
"E", "F", "I", "C90", "UP", "B", "ASYNC", "N", "FBT", "A", "C4", "EM", "FA", "ICN",
|
||||
"G", "PIE", "T20", "PYI", "PT", "RET", "PTH"
|
||||
]
|
||||
lint.ignore = [
|
||||
"A003",
|
||||
"N818"
|
||||
]
|
||||
|
||||
[tool.ruff.lint.isort.sections]
|
||||
ApoloSDK = ["apolo-sdk"]
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
combine-as-imports = true
|
||||
lines-after-imports = 2
|
||||
section-order = ["future", "standard-library", "third-party", "ApoloSDK", "first-party", "local-folder"]
|
||||
known-first-party = ["apolo_app_types"]
|
||||
known-local-folder = ["tests"]
|
||||
|
||||
[tool.mypy]
|
||||
check_untyped_defs = true
|
||||
disallow_any_generics = true
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue