Format entire codebase with ruff and add type hints across all modules: - Apply ruff formatting to all Python files (121 files, 17K insertions) - Add type hints to function signatures throughout lightrag core and API - Update test suite with improved type annotations and docstrings - Add pyrightconfig.json for static type checking configuration - Create prompt_optimized.py and test_extraction_prompt_ab.py test files - Update ruff.toml and .gitignore for improved linting configuration - Standardize code style across examples, reproduce scripts, and utilities
74 lines
2.4 KiB
Python
74 lines
2.4 KiB
Python
import asyncio
|
|
|
|
import numpy as np
|
|
|
|
from lightrag.citation import CitationResult, extract_citations_from_response
|
|
|
|
|
|
# Mock embedding function
|
|
async def mock_embedding_func(texts: list[str]) -> list[list[float]]:
|
|
embeddings = []
|
|
for text in texts:
|
|
text = text.lower()
|
|
vec = [0.0, 0.0, 0.0]
|
|
if 'sky' in text:
|
|
vec[0] = 1.0
|
|
if 'grass' in text:
|
|
vec[1] = 1.0
|
|
if 'water' in text:
|
|
vec[2] = 1.0
|
|
|
|
# Normalize
|
|
norm = np.linalg.norm(vec)
|
|
if norm > 0:
|
|
vec = [v / norm for v in vec]
|
|
embeddings.append(vec)
|
|
return embeddings
|
|
|
|
|
|
async def main():
|
|
# 1. Setup Chunks (The knowledge base)
|
|
chunks = [
|
|
{'id': 'chunk_1', 'content': 'The sky is usually blue during the day.', 'file_path': 'nature.txt'},
|
|
{'id': 'chunk_2', 'content': 'The grass is green because of chlorophyll.', 'file_path': 'biology.txt'},
|
|
{'id': 'chunk_3', 'content': 'Water is wet and essential for life.', 'file_path': 'chemistry.txt'},
|
|
]
|
|
|
|
# 2. Setup References (Map files/chunks to IDs)
|
|
references = [
|
|
{'reference_id': '1', 'file_path': 'nature.txt'},
|
|
{'reference_id': '2', 'file_path': 'biology.txt'},
|
|
{'reference_id': '3', 'file_path': 'chemistry.txt'},
|
|
]
|
|
|
|
# 3. Mock LLM Response
|
|
# Sentence 1: Matches chunk 1 (sky)
|
|
# Sentence 2: Matches chunk 2 (grass)
|
|
# Sentence 3: Matches chunk 3 (water) partially?
|
|
# Sentence 4: No match
|
|
response = 'The sky is blue. The grass is green. Water is wet. Computers are silicon.'
|
|
|
|
print('--- Running Citation Extraction ---')
|
|
result: CitationResult = await extract_citations_from_response(
|
|
response=response, chunks=chunks, references=references, embedding_func=mock_embedding_func, min_similarity=0.5
|
|
)
|
|
|
|
print(f'Original Response: {result.original_response}')
|
|
print(f'Annotated Response: {result.annotated_response}')
|
|
print('\nCitations Found:')
|
|
for cit in result.citations:
|
|
print(f" - Text: '{cit.text}'")
|
|
print(f' Refs: {cit.reference_ids} (Conf: {cit.confidence:.2f})')
|
|
print(f' Pos: {cit.start_char}-{cit.end_char}')
|
|
|
|
print('\nFootnotes:')
|
|
for fn in result.footnotes:
|
|
print(f' {fn}')
|
|
|
|
print('\nUncited Claims:')
|
|
for claim in result.uncited_claims:
|
|
print(f" '{claim}'")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
asyncio.run(main())
|