This commit is contained in:
hzywhite 2025-09-08 09:55:35 +08:00
parent 0dc11e0794
commit 173baf96b9
7 changed files with 149 additions and 56 deletions

View file

@ -176,7 +176,7 @@ LLM_BINDING_API_KEY=your_api_key
### OpenAI Specific Parameters
### To mitigate endless output, set the temperature to a highter value
# OPENAI_LLM_TEMPERATURE=0.8
# OPENAI_LLM_TEMPERATURE=0.95
### OpenRouter Specific Parameters
# OPENAI_LLM_EXTRA_BODY='{"reasoning": {"enabled": false}}'

View file

@ -1 +1 @@
__api_version__ = "0214"
__api_version__ = "0215"

View file

@ -474,7 +474,7 @@ class LightRAG:
self.embedding_func = priority_limit_async_func_call(
self.embedding_func_max_async,
llm_timeout=self.default_embedding_timeout,
queue_name="Embedding func:",
queue_name="Embedding func",
)(self.embedding_func)
# Initialize all storages
@ -571,7 +571,7 @@ class LightRAG:
self.llm_model_func = priority_limit_async_func_call(
self.llm_model_max_async,
llm_timeout=self.default_llm_timeout,
queue_name="LLM func:",
queue_name="LLM func",
)(
partial(
self.llm_model_func, # type: ignore

View file

@ -31,6 +31,7 @@ from .utils import (
process_chunks_unified,
build_file_path,
safe_vdb_operation_with_exception,
create_prefixed_exception,
)
from .base import (
BaseGraphStorage,
@ -1395,9 +1396,9 @@ async def _merge_edges_then_upsert(
# Log based on actual LLM usage
if llm_was_used:
status_message = f"LLMmrg: `{src_id} - {tgt_id}` | {already_fragment}+{num_fragment-already_fragment}{dd_message}"
status_message = f"LLMmrg: `{src_id}`~`{tgt_id}` | {already_fragment}+{num_fragment-already_fragment}{dd_message}"
else:
status_message = f"Merged: `{src_id} - {tgt_id}` | {already_fragment}+{num_fragment-already_fragment}{dd_message}"
status_message = f"Merged: `{src_id}`~`{tgt_id}` | {already_fragment}+{num_fragment-already_fragment}{dd_message}"
logger.info(status_message)
if pipeline_status is not None and pipeline_status_lock is not None:
@ -1622,8 +1623,11 @@ async def merge_nodes_and_edges(
f"Failed to update pipeline status: {status_error}"
)
# Re-raise the original exception
raise
# Re-raise the original exception with a prefix
prefixed_exception = create_prefixed_exception(
e, f"`{entity_name}`"
)
raise prefixed_exception from e
# Create entity processing tasks
entity_tasks = []
@ -1753,8 +1757,11 @@ async def merge_nodes_and_edges(
f"Failed to update pipeline status: {status_error}"
)
# Re-raise the original exception
raise
# Re-raise the original exception with a prefix
prefixed_exception = create_prefixed_exception(
e, f"{sorted_edge_key}"
)
raise prefixed_exception from e
# Create relationship processing tasks
edge_tasks = []
@ -2098,11 +2105,14 @@ async def extract_entities(
if pending:
await asyncio.wait(pending)
# Re-raise the first exception to notify the caller
raise first_exception
# Add progress prefix to the exception message
progress_prefix = f"Chunks[{processed_chunks+1}/{total_chunks}]"
# Re-raise the original exception with a prefix
prefixed_exception = create_prefixed_exception(first_exception, progress_prefix)
raise prefixed_exception from first_exception
# If all tasks completed successfully, chunk_results already contains the results
# Return the chunk_results for later processing in merge_nodes_and_edges
return chunk_results

View file

@ -6,18 +6,20 @@ PROMPTS: dict[str, Any] = {}
PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
# TODO: Deprecated
PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
PROMPTS["DEFAULT_USER_PROMPT"] = "n/a"
PROMPTS["entity_extraction"] = """---Task---
For a given text and a list of entity types, extract all entities and their relationships, then return them in the specified language and format described below.
For a given input text and entity types in the provided real data, extract all entities and their relationships, then return them in the specified language and format described below.
---Instructions---
1. Recognizing definitively conceptualized entities in text. For each identified entity, extract the following information:
- entity_name: Name of the entity, use same language as input text. If English, capitalized the name
- entity_type: Categorize the entity using the provided `Entity_types` list. If a suitable category cannot be determined, classify it as `Other`.
- entity_type: Categorize the entity using the provided entity types. If a suitable category cannot be determined, classify it as `Other`.
- entity_description: Provide a comprehensive description of the entity's attributes and activities based on the information present in the input text. To ensure clarity and precision, all descriptions must replace pronouns and referential terms (e.g., "this document," "our company," "I," "you," "he/she") with the specific nouns they represent.
2. Format each entity as: (entity{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>)
3. From the entities identified, identify all pairs of (source_entity, target_entity) that are directly and clearly related, and extract the following information:
@ -41,14 +43,15 @@ For a given text and a list of entity types, extract all entities and their rela
---Examples---
{examples}
---Input---
---Real Data---
<Input>
Entity_types: [{entity_types}]
Text:
```
{input_text}
```
---Output---
<Output>
"""
PROMPTS["entity_continue_extraction"] = """---Task---
@ -57,16 +60,17 @@ Identify any missed entities or relationships in the last extraction task.
---Instructions---
1. Output the entities and realtionships in the same format as previous extraction task.
2. Do not include entities and relations that have been previously extracted.
3. If the entity doesn't clearly fit in any of`Entity_types` provided, classify it as "Other".
4. Ensure the output language is {language}.
3. If the entity doesn't clearly fit in any of entity types provided, classify it as "Other".
4. Output `{completion_delimiter}` when all the entities and relationships are extracted.
5. Ensure the output language is {language}.
---Output---
<Output>
"""
PROMPTS["entity_extraction_examples"] = [
"""[Example 1]
---Input---
<Input>
Entity_types: [organization,person,location,event,technology,equiment,product,Document,category]
Text:
```
@ -79,7 +83,7 @@ The underlying dismissal earlier seemed to falter, replaced by a glimpse of relu
It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
```
---Output---
<Output>
(entity{tuple_delimiter}Alex{tuple_delimiter}person{tuple_delimiter}Alex is a character who experiences frustration and is observant of the dynamics among other characters.){record_delimiter}
(entity{tuple_delimiter}Taylor{tuple_delimiter}person{tuple_delimiter}Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective.){record_delimiter}
(entity{tuple_delimiter}Jordan{tuple_delimiter}person{tuple_delimiter}Jordan shares a commitment to discovery and has a significant interaction with Taylor regarding a device.){record_delimiter}
@ -95,7 +99,7 @@ It was a small transformation, barely perceptible, but one that Alex noted with
""",
"""[Example 2]
---Input---
<Input>
Entity_types: [organization,person,location,event,technology,equiment,product,Document,category]
Text:
```
@ -108,7 +112,7 @@ Meanwhile, commodity markets reflected a mixed sentiment. Gold futures rose by 1
Financial experts are closely watching the Federal Reserve's next move, as speculation grows over potential rate hikes. The upcoming policy announcement is expected to influence investor confidence and overall market stability.
```
---Output---
<Output>
(entity{tuple_delimiter}Global Tech Index{tuple_delimiter}category{tuple_delimiter}The Global Tech Index tracks the performance of major technology stocks and experienced a 3.4% decline today.){record_delimiter}
(entity{tuple_delimiter}Nexon Technologies{tuple_delimiter}organization{tuple_delimiter}Nexon Technologies is a tech company that saw its stock decline by 7.8% after disappointing earnings.){record_delimiter}
(entity{tuple_delimiter}Omega Energy{tuple_delimiter}organization{tuple_delimiter}Omega Energy is an energy company that gained 2.1% in stock value due to rising oil prices.){record_delimiter}
@ -126,14 +130,14 @@ Financial experts are closely watching the Federal Reserve's next move, as specu
""",
"""[Example 3]
---Input---
<Input>
Entity_types: [organization,person,location,event,technology,equiment,product,Document,category]
Text:
```
At the World Athletics Championship in Tokyo, Noah Carter broke the 100m sprint record using cutting-edge carbon-fiber spikes.
```
---Output---
<Output>
(entity{tuple_delimiter}World Athletics Championship{tuple_delimiter}event{tuple_delimiter}The World Athletics Championship is a global sports competition featuring top athletes in track and field.){record_delimiter}
(entity{tuple_delimiter}Tokyo{tuple_delimiter}location{tuple_delimiter}Tokyo is the host city of the World Athletics Championship.){record_delimiter}
(entity{tuple_delimiter}Noah Carter{tuple_delimiter}person{tuple_delimiter}Noah Carter is a sprinter who set a new record in the 100m sprint at the World Athletics Championship.){record_delimiter}
@ -149,14 +153,14 @@ At the World Athletics Championship in Tokyo, Noah Carter broke the 100m sprint
""",
"""[Example 4]
---Input---
<Input>
Entity_types: [organization,person,location,event,technology,equiment,product,Document,category]
Text:
```
在北京举行的人工智能大会上腾讯公司的首席技术官张伟发布了最新的大语言模型"腾讯智言"该模型在自然语言处理方面取得了重大突破
```
---Output---
<Output>
(entity{tuple_delimiter}人工智能大会{tuple_delimiter}event{tuple_delimiter}人工智能大会是在北京举行的技术会议专注于人工智能领域的最新发展){record_delimiter}
(entity{tuple_delimiter}北京{tuple_delimiter}location{tuple_delimiter}北京是人工智能大会的举办城市){record_delimiter}
(entity{tuple_delimiter}腾讯公司{tuple_delimiter}organization{tuple_delimiter}腾讯公司是参与人工智能大会的科技企业发布了新的语言模型产品){record_delimiter}

View file

@ -473,12 +473,12 @@ def priority_limit_async_func_call(
nonlocal max_execution_timeout, max_task_duration
if max_execution_timeout is None:
max_execution_timeout = (
llm_timeout + 30
) # LLM timeout + 30s buffer for network delays
llm_timeout + 150
) # LLM timeout + 150s buffer for low-level retry
if max_task_duration is None:
max_task_duration = (
llm_timeout + 60
) # LLM timeout + 1min buffer for execution phase
llm_timeout + 180
) # LLM timeout + 180s buffer for health check phase
queue = asyncio.PriorityQueue(maxsize=max_queue_size)
tasks = set()
@ -2588,3 +2588,45 @@ def get_pinyin_sort_key(text: str) -> str:
else:
# pypinyin not available, use simple string sorting
return text.lower()
def create_prefixed_exception(original_exception: Exception, prefix: str) -> Exception:
"""
Safely create a prefixed exception that adapts to all error types.
Args:
original_exception: The original exception.
prefix: The prefix to add.
Returns:
A new exception with the prefix, maintaining the original exception type if possible.
"""
try:
# Method 1: Try to reconstruct using original arguments.
if hasattr(original_exception, "args") and original_exception.args:
args = list(original_exception.args)
# Find the first string argument and prefix it. This is safer for
# exceptions like OSError where the first arg is an integer (errno).
found_str = False
for i, arg in enumerate(args):
if isinstance(arg, str):
args[i] = f"{prefix}: {arg}"
found_str = True
break
# If no string argument is found, prefix the first argument's string representation.
if not found_str:
args[0] = f"{prefix}: {args[0]}"
return type(original_exception)(*args)
else:
# Method 2: If no args, try single parameter construction.
return type(original_exception)(f"{prefix}: {str(original_exception)}")
except (TypeError, ValueError, AttributeError) as construct_error:
# Method 3: If reconstruction fails, wrap it in a RuntimeError.
# This is the safest fallback, as attempting to create the same type
# with a single string can fail if the constructor requires multiple arguments.
return RuntimeError(
f"{prefix}: {type(original_exception).__name__}: {str(original_exception)} "
f"(Original exception could not be reconstructed: {construct_error})"
)

View file

@ -34,7 +34,7 @@ import { errorMessage } from '@/lib/utils'
import { toast } from 'sonner'
import { useBackendState } from '@/stores/state'
import { RefreshCwIcon, ActivityIcon, ArrowUpIcon, ArrowDownIcon, RotateCcwIcon, CheckSquareIcon, XIcon } from 'lucide-react'
import { RefreshCwIcon, ActivityIcon, ArrowUpIcon, ArrowDownIcon, RotateCcwIcon, CheckSquareIcon, XIcon, AlertTriangle, Info } from 'lucide-react'
import PipelineStatusDialog from '@/components/documents/PipelineStatusDialog'
import { useScheme } from '@/contexts/SchemeContext';
@ -63,6 +63,26 @@ const getDisplayFileName = (doc: DocStatusResponse, maxLength: number = 20): str
: fileName;
};
const formatMetadata = (metadata: Record<string, any>): string => {
const formattedMetadata = { ...metadata };
if (formattedMetadata.processing_start_time && typeof formattedMetadata.processing_start_time === 'number') {
const date = new Date(formattedMetadata.processing_start_time * 1000);
if (!isNaN(date.getTime())) {
formattedMetadata.processing_start_time = date.toLocaleString();
}
}
if (formattedMetadata.processing_end_time && typeof formattedMetadata.processing_end_time === 'number') {
const date = new Date(formattedMetadata.processing_end_time * 1000);
if (!isNaN(date.getTime())) {
formattedMetadata.processing_end_time = date.toLocaleString();
}
}
return JSON.stringify(formattedMetadata, null, 2);
};
const pulseStyle = `
/* Tooltip styles */
.tooltip-container {
@ -77,6 +97,7 @@ const pulseStyle = `
white-space: normal;
border-radius: 0.375rem;
padding: 0.5rem 0.75rem;
font-size: 0.75rem; /* 12px */
background-color: rgba(0, 0, 0, 0.95);
color: white;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
@ -1222,29 +1243,45 @@ export default function DocumentManager() {
{doc.scheme_name || '-'}
</TableCell>
<TableCell>
{doc.status === 'processed' && (
<span className="text-green-600">{t('documentPanel.documentManager.status.completed')}</span>
)}
{doc.status === 'processing' && (
<span className="text-blue-600">{t('documentPanel.documentManager.status.processing')}</span>
)}
{doc.status === 'pending' && (
<span className="text-yellow-600">{t('documentPanel.documentManager.status.pending')}</span>
)}
{doc.status === 'failed' && (
<span className="text-red-600">{t('documentPanel.documentManager.status.failed')}</span>
)}
{doc.status === 'ready' && (
<span className="text-purple-600">{t('documentPanel.documentManager.status.ready')}</span>
)}
{doc.status === 'handling' && (
<span className="text-gray-600">{t('documentPanel.documentManager.status.handling')}</span>
)}
{doc.error_msg && (
<span className="ml-2 text-red-500" title={doc.error_msg}>
</span>
)}
<div className="group relative flex items-center overflow-visible tooltip-container">
{doc.status === 'processed' && (
<span className="text-green-600">{t('documentPanel.documentManager.status.completed')}</span>
)}
{doc.status === 'processing' && (
<span className="text-blue-600">{t('documentPanel.documentManager.status.processing')}</span>
)}
{doc.status === 'pending' && (
<span className="text-yellow-600">{t('documentPanel.documentManager.status.pending')}</span>
)}
{doc.status === 'failed' && (
<span className="text-red-600">{t('documentPanel.documentManager.status.failed')}</span>
)}
{doc.status === 'ready' && (
<span className="text-purple-600">{t('documentPanel.documentManager.status.ready')}</span>
)}
{doc.status === 'handling' && (
<span className="text-gray-600">{t('documentPanel.documentManager.status.handling')}</span>
)}
{/* Icon rendering logic */}
{doc.error_msg ? (
<AlertTriangle className="ml-2 h-4 w-4 text-yellow-500" />
) : (doc.metadata && Object.keys(doc.metadata).length > 0) && (
<Info className="ml-2 h-4 w-4 text-blue-500" />
)}
{/* Tooltip rendering logic */}
{(doc.error_msg || (doc.metadata && Object.keys(doc.metadata).length > 0)) && (
<div className="invisible group-hover:visible tooltip">
{doc.error_msg && (
<pre>{doc.error_msg}</pre>
)}
{doc.metadata && Object.keys(doc.metadata).length > 0 && (
<pre>{formatMetadata(doc.metadata)}</pre>
)}
</div>
)}
</div>
</TableCell>
<TableCell>{doc.content_length ?? '-'}</TableCell>
<TableCell>{doc.chunks_count ?? '-'}</TableCell>