From c0df9447e8e523bd1bbcc55a6e7433eeeaa64266 Mon Sep 17 00:00:00 2001 From: Hammton Date: Tue, 30 Sep 2025 22:24:01 +0300 Subject: [PATCH] Update lightrag_azure_openai_demo.py --- examples/lightrag_azure_openai_demo.py | 216 ++++++++++++++++++++----- 1 file changed, 178 insertions(+), 38 deletions(-) diff --git a/examples/lightrag_azure_openai_demo.py b/examples/lightrag_azure_openai_demo.py index c101383d..2d6fd560 100644 --- a/examples/lightrag_azure_openai_demo.py +++ b/examples/lightrag_azure_openai_demo.py @@ -1,3 +1,4 @@ +==========controled ingestion in batches============= import os import asyncio from lightrag import LightRAG, QueryParam @@ -6,8 +7,8 @@ import numpy as np from dotenv import load_dotenv import logging from openai import AzureOpenAI -from lightrag.kg.shared_storage import initialize_pipeline_status - +import time +start_time = time.time() logging.basicConfig(level=logging.INFO) load_dotenv() @@ -20,14 +21,15 @@ AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") AZURE_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_EMBEDDING_DEPLOYMENT") AZURE_EMBEDDING_API_VERSION = os.getenv("AZURE_EMBEDDING_API_VERSION") -WORKING_DIR = "./dickens" +WORKING_DIR = "C:\\Users\\user\\testfolder" -if os.path.exists(WORKING_DIR): - import shutil +# The code below removes the working_dir and creates a new one! +# if os.path.exists(WORKING_DIR): +# import shutil - shutil.rmtree(WORKING_DIR) +# shutil.rmtree(WORKING_DIR) -os.mkdir(WORKING_DIR) +# os.mkdir(WORKING_DIR) async def llm_model_func( @@ -70,19 +72,149 @@ async def embedding_func(texts: list[str]) -> np.ndarray: async def test_funcs(): result = await llm_model_func("How are you?") - print("Resposta do llm_model_func: ", result) + print("Response from llm_model_func: ", result) result = await embedding_func(["How are you?"]) - print("Resultado do embedding_func: ", result.shape) - print("Dimensão da embedding: ", result.shape[1]) + print("Result from embedding_func: ", result.shape) + print("Embedding dimension: ", result.shape[1]) asyncio.run(test_funcs()) -embedding_dimension = 3072 +embedding_dimension = 1536 + +rag = LightRAG( + working_dir=WORKING_DIR, + addon_params={"insert_batch_size": 3}, + llm_model_func=llm_model_func, + embedding_func=EmbeddingFunc( + embedding_dim=embedding_dimension, + max_token_size=8192, + func=embedding_func, + ), +) + +folder_path = 'C:/Users/example/test/LightRAG/my_docs' # With os, this specification of the documents folder is not a problem. + +def normalize_path(path): + # Normalize the path + normalized_path = os.path.normpath(path) + # Replace backslashes with forward slashes + return normalized_path.replace('\\', '/') + +# Output file where we store filenames +output_file = 'processed.txt' +# The maximum number of files to process +batch_files = 5 + +# Function to include document in vector-store +def process_doc(file_path): + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + return content + +# logging.info(f"Adding document: {file_path}") +# rag.insert(content) + +# Function to read existing filenames from the output file +def read_existing_files(output_file): + if os.path.exists(output_file): + with open(output_file, 'r') as f: + return set(line.strip() for line in f.readlines()) + return set() + +input_docs = [] +# Check if the folder exists +if os.path.exists(folder_path) and os.path.isdir(folder_path): + # Read existing filenames from the output file + existing_files = read_existing_files(output_file) + # Open the output -async def initialize_rag(): +==========Querier============================================== + +import os +import asyncio +from lightrag import LightRAG, QueryParam +from lightrag.utils import EmbeddingFunc +import numpy as np +from dotenv import load_dotenv +import logging +from openai import AzureOpenAI +import time +from multiprocessing import freeze_support + +# Configure logging +logging.basicConfig(level=logging.INFO) + +def main(): + start_time = time.time() + load_dotenv() + + # Version of the script + VERSION = "0.2" + + AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION") + AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT") + AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY") + AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") + + AZURE_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_EMBEDDING_DEPLOYMENT") + AZURE_EMBEDDING_API_VERSION = os.getenv("AZURE_EMBEDDING_API_VERSION") + + WORKING_DIR = "C:/Users/user/testfolder" + + async def llm_model_func( + prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs + ) -> str: + client = AzureOpenAI( + api_key=AZURE_OPENAI_API_KEY, + api_version=AZURE_OPENAI_API_VERSION, + azure_endpoint=AZURE_OPENAI_ENDPOINT, + ) + + messages = [] + if system_prompt: + messages.append({"role": "system", "content": system_prompt}) + if history_messages: + messages.extend(history_messages) + messages.append({"role": "user", "content": prompt}) + + chat_completion = client.chat.completions.create( + model=AZURE_OPENAI_DEPLOYMENT, + messages=messages, + temperature=kwargs.get("temperature", 0.5), + max_tokens=kwargs.get("max_tokens", 1500) + ) + + return chat_completion.choices[0].message.content + + async def embedding_func(texts: list[str]) -> np.ndarray: + client = AzureOpenAI( + api_key=AZURE_OPENAI_API_KEY, + api_version=AZURE_EMBEDDING_API_VERSION, + azure_endpoint=AZURE_OPENAI_ENDPOINT, + ) + embedding = client.embeddings.create(model=AZURE_EMBEDDING_DEPLOYMENT, input=texts) + + embeddings = [item.embedding for item in embedding.data] + return np.array(embeddings) + + # async def test_funcs(): + # try: + # result_llm = await llm_model_func("How are you?", system_prompt="Act as a friendly assistant.") + # print("Response from llm_model_func: ", result_llm) + + # result_embedding = await embedding_func(["How are you?"]) + # print("Result from embedding_func: ", result_embedding.shape) + # print("Embedding dimension: ", result_embedding.shape[1]) + # except Exception as e: + # print(f"An error occurred in test_funcs: {e}") + + # asyncio.run(test_funcs()) + + embedding_dimension = 1536 + rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=llm_model_func, @@ -93,34 +225,42 @@ async def initialize_rag(): ), ) - await rag.initialize_storages() - await initialize_pipeline_status() + def query_rag(question): + """ + Execute a query in hybrid mode. + """ + try: + start_time = time.time() + response = rag.query( + question, + param=QueryParam(mode="hybrid") # Set hybrid mode + ) + if response is None or not response.strip(): + print("No relevant answer found. Check if the database is correctly populated.") + return - return rag + print("\n--- Answer ---") + print(response) + print("\n") + duration = time.time() - start_time + print(f"The answer took {duration} seconds.") + except Exception as e: + print(f"An error occurred while executing the query: {e}") + print("LightRAG - Interactive Questioning") + print(f"Version: {VERSION}") + print("Type 'exit' to terminate the program.\n") -def main(): - rag = asyncio.run(initialize_rag()) + while True: + try: + question = input("Ask your question: ") - book1 = open("./book_1.txt", encoding="utf-8") - book2 = open("./book_2.txt", encoding="utf-8") + if question.lower() in ["exit", "quit"]: + print("Program terminated.") + break + query_rag(question) + except: + print("Error answering the question") - rag.insert([book1.read(), book2.read()]) - - query_text = "What are the main themes?" - - print("Result (Naive):") - print(rag.query(query_text, param=QueryParam(mode="naive"))) - - print("\nResult (Local):") - print(rag.query(query_text, param=QueryParam(mode="local"))) - - print("\nResult (Global):") - print(rag.query(query_text, param=QueryParam(mode="global"))) - - print("\nResult (Hybrid):") - print(rag.query(query_text, param=QueryParam(mode="hybrid"))) - - -if __name__ == "__main__": - main() +if __name__ == '__main__': +