LightRAG/examples/unofficial-sample/copy_llm_cache_to_another_storage.py
clssck 65d2cd16b1 feat(examples, lightrag): fix logging and code improvements
Fix logging output in evaluation test harness and examples:
- Replace print() statements with logger calls in e2e_test_harness.py
- Update copy_llm_cache_to_another_storage.py to use logger instead of print
- Remove redundant logging configuration in copy_llm_cache_to_another_storage.py
Fix path handling and typos:
- Correct makedirs() call in lightrag_openai_demo.py to create log_dir directly
- Update constants.py comments to clarify SOURCE_IDS_LIMIT_METHOD options
- Remove duplicate return statement in utils.py normalize_extracted_info()
- Fix error string formatting in chroma_impl.py with !s conversion
- Remove unused pipmaster import from chroma_impl.py
2025-12-05 18:10:19 +01:00

113 lines
3.5 KiB
Python

"""
Sometimes you need to switch a storage solution, but you want to save LLM token and time.
This handy script helps you to copy the LLM caches from one storage solution to another.
(Not all the storage impl are supported)
"""
import asyncio
import os
from dotenv import load_dotenv
from lightrag.kg.json_kv_impl import JsonKVStorage
from lightrag.kg.postgres_impl import PGKVStorage, PostgreSQLDB
from lightrag.namespace import NameSpace
from lightrag.utils import logger
load_dotenv()
ROOT_DIR = os.environ.get('ROOT_DIR')
WORKING_DIR = f'{ROOT_DIR}/dickens'
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
# AGE
os.environ['AGE_GRAPH_NAME'] = 'chinese'
postgres_db = PostgreSQLDB(
config={
'host': 'localhost',
'port': 15432,
'user': 'rag',
'password': 'rag',
'database': 'r2',
}
)
async def copy_from_postgres_to_json():
await postgres_db.initdb()
from_llm_response_cache = PGKVStorage(
namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE,
global_config={'embedding_batch_num': 6},
embedding_func=None,
db=postgres_db,
)
to_llm_response_cache = JsonKVStorage(
namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE,
global_config={'working_dir': WORKING_DIR},
embedding_func=None,
)
# Get all cache data using the new flattened structure
all_data = await from_llm_response_cache.get_all()
# Convert flattened data to hierarchical structure for JsonKVStorage
kv = {}
for flattened_key, cache_entry in all_data.items():
# Parse flattened key: {mode}:{cache_type}:{hash}
parts = flattened_key.split(':', 2)
if len(parts) == 3:
mode, _cache_type, hash_value = parts
if mode not in kv:
kv[mode] = {}
kv[mode][hash_value] = cache_entry
logger.info(f'Copying {flattened_key} -> {mode}[{hash_value}]')
else:
logger.warning(f'Skipping invalid key format: {flattened_key}')
await to_llm_response_cache.upsert(kv)
await to_llm_response_cache.index_done_callback()
logger.info('Mission accomplished!')
async def copy_from_json_to_postgres():
await postgres_db.initdb()
from_llm_response_cache = JsonKVStorage(
namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE,
global_config={'working_dir': WORKING_DIR},
embedding_func=None,
)
to_llm_response_cache = PGKVStorage(
namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE,
global_config={'embedding_batch_num': 6},
embedding_func=None,
db=postgres_db,
)
# Get all cache data from JsonKVStorage (hierarchical structure)
all_data = await from_llm_response_cache.get_all()
# Convert hierarchical data to flattened structure for PGKVStorage
flattened_data = {}
for mode, mode_data in all_data.items():
print(f'Processing mode: {mode}')
for hash_value, cache_entry in mode_data.items():
# Determine cache_type from cache entry or use default
cache_type = cache_entry.get('cache_type', 'extract')
# Create flattened key: {mode}:{cache_type}:{hash}
flattened_key = f'{mode}:{cache_type}:{hash_value}'
flattened_data[flattened_key] = cache_entry
print(f'\tConverting {mode}[{hash_value}] -> {flattened_key}')
# Upsert the flattened data
await to_llm_response_cache.upsert(flattened_data)
print('Mission accomplished!')
if __name__ == '__main__':
asyncio.run(copy_from_json_to_postgres())