LightRAG/reproduce_citation.py
clssck 69358d830d test(lightrag,examples,api): comprehensive ruff formatting and type hints
Format entire codebase with ruff and add type hints across all modules:
- Apply ruff formatting to all Python files (121 files, 17K insertions)
- Add type hints to function signatures throughout lightrag core and API
- Update test suite with improved type annotations and docstrings
- Add pyrightconfig.json for static type checking configuration
- Create prompt_optimized.py and test_extraction_prompt_ab.py test files
- Update ruff.toml and .gitignore for improved linting configuration
- Standardize code style across examples, reproduce scripts, and utilities
2025-12-05 15:17:06 +01:00

74 lines
2.4 KiB
Python

import asyncio
import numpy as np
from lightrag.citation import CitationResult, extract_citations_from_response
# Mock embedding function
async def mock_embedding_func(texts: list[str]) -> list[list[float]]:
embeddings = []
for text in texts:
text = text.lower()
vec = [0.0, 0.0, 0.0]
if 'sky' in text:
vec[0] = 1.0
if 'grass' in text:
vec[1] = 1.0
if 'water' in text:
vec[2] = 1.0
# Normalize
norm = np.linalg.norm(vec)
if norm > 0:
vec = [v / norm for v in vec]
embeddings.append(vec)
return embeddings
async def main():
# 1. Setup Chunks (The knowledge base)
chunks = [
{'id': 'chunk_1', 'content': 'The sky is usually blue during the day.', 'file_path': 'nature.txt'},
{'id': 'chunk_2', 'content': 'The grass is green because of chlorophyll.', 'file_path': 'biology.txt'},
{'id': 'chunk_3', 'content': 'Water is wet and essential for life.', 'file_path': 'chemistry.txt'},
]
# 2. Setup References (Map files/chunks to IDs)
references = [
{'reference_id': '1', 'file_path': 'nature.txt'},
{'reference_id': '2', 'file_path': 'biology.txt'},
{'reference_id': '3', 'file_path': 'chemistry.txt'},
]
# 3. Mock LLM Response
# Sentence 1: Matches chunk 1 (sky)
# Sentence 2: Matches chunk 2 (grass)
# Sentence 3: Matches chunk 3 (water) partially?
# Sentence 4: No match
response = 'The sky is blue. The grass is green. Water is wet. Computers are silicon.'
print('--- Running Citation Extraction ---')
result: CitationResult = await extract_citations_from_response(
response=response, chunks=chunks, references=references, embedding_func=mock_embedding_func, min_similarity=0.5
)
print(f'Original Response: {result.original_response}')
print(f'Annotated Response: {result.annotated_response}')
print('\nCitations Found:')
for cit in result.citations:
print(f" - Text: '{cit.text}'")
print(f' Refs: {cit.reference_ids} (Conf: {cit.confidence:.2f})')
print(f' Pos: {cit.start_char}-{cit.end_char}')
print('\nFootnotes:')
for fn in result.footnotes:
print(f' {fn}')
print('\nUncited Claims:')
for claim in result.uncited_claims:
print(f" '{claim}'")
if __name__ == '__main__':
asyncio.run(main())