Format entire codebase with ruff and add type hints across all modules: - Apply ruff formatting to all Python files (121 files, 17K insertions) - Add type hints to function signatures throughout lightrag core and API - Update test suite with improved type annotations and docstrings - Add pyrightconfig.json for static type checking configuration - Create prompt_optimized.py and test_extraction_prompt_ab.py test files - Update ruff.toml and .gitignore for improved linting configuration - Standardize code style across examples, reproduce scripts, and utilities
117 lines
3.6 KiB
Python
117 lines
3.6 KiB
Python
import os
|
|
|
|
from lightrag import LightRAG
|
|
from lightrag.llm.openai import gpt_4o_mini_complete
|
|
|
|
#########
|
|
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
|
|
# import nest_asyncio
|
|
# nest_asyncio.apply()
|
|
#########
|
|
|
|
WORKING_DIR = './custom_kg'
|
|
|
|
if not os.path.exists(WORKING_DIR):
|
|
os.mkdir(WORKING_DIR)
|
|
|
|
rag = LightRAG(
|
|
working_dir=WORKING_DIR,
|
|
llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
|
|
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
|
|
)
|
|
|
|
custom_kg = {
|
|
'entities': [
|
|
{
|
|
'entity_name': 'CompanyA',
|
|
'entity_type': 'Organization',
|
|
'description': 'A major technology company',
|
|
'source_id': 'Source1',
|
|
},
|
|
{
|
|
'entity_name': 'ProductX',
|
|
'entity_type': 'Product',
|
|
'description': 'A popular product developed by CompanyA',
|
|
'source_id': 'Source1',
|
|
},
|
|
{
|
|
'entity_name': 'PersonA',
|
|
'entity_type': 'Person',
|
|
'description': 'A renowned researcher in AI',
|
|
'source_id': 'Source2',
|
|
},
|
|
{
|
|
'entity_name': 'UniversityB',
|
|
'entity_type': 'Organization',
|
|
'description': 'A leading university specializing in technology and sciences',
|
|
'source_id': 'Source2',
|
|
},
|
|
{
|
|
'entity_name': 'CityC',
|
|
'entity_type': 'Location',
|
|
'description': 'A large metropolitan city known for its culture and economy',
|
|
'source_id': 'Source3',
|
|
},
|
|
{
|
|
'entity_name': 'EventY',
|
|
'entity_type': 'Event',
|
|
'description': 'An annual technology conference held in CityC',
|
|
'source_id': 'Source3',
|
|
},
|
|
],
|
|
'relationships': [
|
|
{
|
|
'src_id': 'CompanyA',
|
|
'tgt_id': 'ProductX',
|
|
'description': 'CompanyA develops ProductX',
|
|
'keywords': 'develop, produce',
|
|
'weight': 1.0,
|
|
'source_id': 'Source1',
|
|
},
|
|
{
|
|
'src_id': 'PersonA',
|
|
'tgt_id': 'UniversityB',
|
|
'description': 'PersonA works at UniversityB',
|
|
'keywords': 'employment, affiliation',
|
|
'weight': 0.9,
|
|
'source_id': 'Source2',
|
|
},
|
|
{
|
|
'src_id': 'CityC',
|
|
'tgt_id': 'EventY',
|
|
'description': 'EventY is hosted in CityC',
|
|
'keywords': 'host, location',
|
|
'weight': 0.8,
|
|
'source_id': 'Source3',
|
|
},
|
|
],
|
|
'chunks': [
|
|
{
|
|
'content': 'ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.',
|
|
'source_id': 'Source1',
|
|
'source_chunk_index': 0,
|
|
},
|
|
{
|
|
'content': 'One outstanding feature of ProductX is its advanced AI capabilities.',
|
|
'source_id': 'Source1',
|
|
'chunk_order_index': 1,
|
|
},
|
|
{
|
|
'content': 'PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.',
|
|
'source_id': 'Source2',
|
|
'source_chunk_index': 0,
|
|
},
|
|
{
|
|
'content': 'EventY, held in CityC, attracts technology enthusiasts and companies from around the globe.',
|
|
'source_id': 'Source3',
|
|
'source_chunk_index': 0,
|
|
},
|
|
{
|
|
'content': 'None',
|
|
'source_id': 'UNKNOWN',
|
|
'source_chunk_index': 0,
|
|
},
|
|
],
|
|
}
|
|
|
|
rag.insert_custom_kg(custom_kg)
|