This commit is contained in:
Raphaël MANSUY 2025-12-04 19:19:03 +08:00
parent a6ae023072
commit d2b9a36d92
2 changed files with 26 additions and 0 deletions

View file

@ -1184,6 +1184,7 @@ async def pipeline_enqueue_file(
_convert_with_docling, file_path _convert_with_docling, file_path
) )
else: else:
<<<<<<< HEAD
<<<<<<< HEAD <<<<<<< HEAD
if not pm.is_installed("pypdf2"): # type: ignore if not pm.is_installed("pypdf2"): # type: ignore
pm.install("pypdf2") pm.install("pypdf2")
@ -1195,13 +1196,18 @@ async def pipeline_enqueue_file(
for page in reader.pages: for page in reader.pages:
content += page.extract_text() + "\n" content += page.extract_text() + "\n"
======= =======
=======
>>>>>>> 69a0b74c (refactor: move document deps to api group, remove dynamic imports)
# Use pypdf (non-blocking via to_thread) # Use pypdf (non-blocking via to_thread)
content = await asyncio.to_thread( content = await asyncio.to_thread(
_extract_pdf_pypdf, _extract_pdf_pypdf,
file, file,
global_args.pdf_decrypt_password, global_args.pdf_decrypt_password,
) )
<<<<<<< HEAD
>>>>>>> 4b31942e (refactor: move document deps to api group, remove dynamic imports) >>>>>>> 4b31942e (refactor: move document deps to api group, remove dynamic imports)
=======
>>>>>>> 69a0b74c (refactor: move document deps to api group, remove dynamic imports)
except Exception as e: except Exception as e:
error_files = [ error_files = [
{ {

View file

@ -92,6 +92,7 @@ docling = [
# Offline deployment dependencies (layered design for flexibility) # Offline deployment dependencies (layered design for flexibility)
<<<<<<< HEAD <<<<<<< HEAD
<<<<<<< HEAD
offline-docs = [ offline-docs = [
# Document processing dependencies # Document processing dependencies
"docling>=1.0.0,<3.0.0", "docling>=1.0.0,<3.0.0",
@ -103,6 +104,8 @@ offline-docs = [
======= =======
>>>>>>> 4b31942e (refactor: move document deps to api group, remove dynamic imports) >>>>>>> 4b31942e (refactor: move document deps to api group, remove dynamic imports)
=======
>>>>>>> 69a0b74c (refactor: move document deps to api group, remove dynamic imports)
offline-storage = [ offline-storage = [
# Storage backend dependencies # Storage backend dependencies
"redis>=5.0.0,<7.0.0", "redis>=5.0.0,<7.0.0",
@ -127,6 +130,23 @@ offline-llm = [
offline = [ offline = [
# Complete offline package (includes api for document processing, plus storage and LLM) # Complete offline package (includes api for document processing, plus storage and LLM)
"lightrag-hku[api,offline-storage,offline-llm]", "lightrag-hku[api,offline-storage,offline-llm]",
<<<<<<< HEAD
=======
]
evaluation = [
# RAG evaluation dependencies (RAGAS framework)
"ragas>=0.3.7",
"datasets>=4.3.0",
"httpx>=0.28.1",
"pytest>=8.4.2",
"pytest-asyncio>=1.2.0",
]
observability = [
# LLM observability and tracing dependencies
"langfuse>=3.8.1",
>>>>>>> 69a0b74c (refactor: move document deps to api group, remove dynamic imports)
] ]
[project.scripts] [project.scripts]