fix: create enrichments

This commit is contained in:
lxobr 2025-10-21 00:57:42 +02:00
parent 834cf8b113
commit 8e580bd3d3
3 changed files with 16 additions and 12 deletions

View file

@ -68,17 +68,13 @@ async def _generate_enrichment_report(
async def _create_enrichment_datapoint( async def _create_enrichment_datapoint(
improved_answer_item: Dict, improved_answer_item: Dict,
report_text: str, report_text: str,
nodeset: NodeSet,
) -> Optional[FeedbackEnrichment]: ) -> Optional[FeedbackEnrichment]:
"""Create a single FeedbackEnrichment DataPoint with proper ID and nodeset assignment.""" """Create a single FeedbackEnrichment DataPoint with proper ID and nodeset assignment."""
try: try:
question = improved_answer_item["question"] question = improved_answer_item["question"]
improved_answer = improved_answer_item["improved_answer"] improved_answer = improved_answer_item["improved_answer"]
# Create nodeset following UserQAFeedback pattern
nodeset = NodeSet(
id=uuid5(NAMESPACE_OID, name="FeedbackEnrichment"), name="FeedbackEnrichment"
)
enrichment = FeedbackEnrichment( enrichment = FeedbackEnrichment(
id=str(uuid5(NAMESPACE_OID, f"{question}_{improved_answer}")), id=str(uuid5(NAMESPACE_OID, f"{question}_{improved_answer}")),
text=report_text, text=report_text,
@ -87,7 +83,7 @@ async def _create_enrichment_datapoint(
improved_answer=improved_answer, improved_answer=improved_answer,
feedback_id=improved_answer_item["feedback_id"], feedback_id=improved_answer_item["feedback_id"],
interaction_id=improved_answer_item["interaction_id"], interaction_id=improved_answer_item["interaction_id"],
belongs_to_set=nodeset, belongs_to_set=[nodeset],
) )
return enrichment return enrichment
@ -119,6 +115,9 @@ async def create_enrichments(
logger.info("Creating enrichments", count=len(improved_answers)) logger.info("Creating enrichments", count=len(improved_answers))
# Create nodeset once for all enrichments
nodeset = NodeSet(id=uuid5(NAMESPACE_OID, name="FeedbackEnrichment"), name="FeedbackEnrichment")
enrichments: List[FeedbackEnrichment] = [] enrichments: List[FeedbackEnrichment] = []
for improved_answer_item in improved_answers: for improved_answer_item in improved_answers:
@ -130,7 +129,7 @@ async def create_enrichments(
question, improved_answer, new_context, report_prompt_location question, improved_answer, new_context, report_prompt_location
) )
enrichment = await _create_enrichment_datapoint(improved_answer_item, report_text) enrichment = await _create_enrichment_datapoint(improved_answer_item, report_text, nodeset)
if enrichment: if enrichment:
enrichments.append(enrichment) enrichments.append(enrichment)

View file

@ -18,4 +18,4 @@ class FeedbackEnrichment(DataPoint):
improved_answer: str improved_answer: str
feedback_id: UUID feedback_id: UUID
interaction_id: UUID interaction_id: UUID
belongs_to_set: Optional[NodeSet] = None belongs_to_set: Optional[List[NodeSet]] = None

View file

@ -3,6 +3,9 @@ import asyncio
import cognee import cognee
from cognee.api.v1.search import SearchType from cognee.api.v1.search import SearchType
from cognee.modules.pipelines.tasks.task import Task from cognee.modules.pipelines.tasks.task import Task
from cognee.tasks.graph import extract_graph_from_data
from cognee.tasks.storage import add_data_points
from cognee.shared.data_models import KnowledgeGraph
from cognee.tasks.feedback.extract_feedback_interactions import extract_feedback_interactions from cognee.tasks.feedback.extract_feedback_interactions import extract_feedback_interactions
from cognee.tasks.feedback.generate_improved_answers import generate_improved_answers from cognee.tasks.feedback.generate_improved_answers import generate_improved_answers
@ -49,12 +52,14 @@ async def run_question_and_submit_feedback(question_text: str) -> bool:
async def run_feedback_enrichment_memify(last_n: int = 5): async def run_feedback_enrichment_memify(last_n: int = 5):
"""Execute memify with extraction, answer improvement, and enrichment creation tasks.""" """Execute memify with extraction, answer improvement, enrichment creation, and graph processing tasks."""
# Instantiate tasks with their own kwargs # Instantiate tasks with their own kwargs
extraction_tasks = [Task(extract_feedback_interactions, last_n=last_n)] extraction_tasks = [Task(extract_feedback_interactions, last_n=last_n)]
enrichment_tasks = [ enrichment_tasks = [
Task(generate_improved_answers, retriever_name="graph_completion_cot", top_k=20), Task(generate_improved_answers, retriever_name="graph_completion_cot", top_k=20),
Task(create_enrichments), Task(create_enrichments),
Task(extract_graph_from_data, graph_model=KnowledgeGraph, task_config={"batch_size": 10}),
Task(add_data_points, task_config={"batch_size": 10}),
] ]
await cognee.memify( await cognee.memify(
extraction_tasks=extraction_tasks, extraction_tasks=extraction_tasks,
@ -65,9 +70,9 @@ async def run_feedback_enrichment_memify(last_n: int = 5):
async def main(): async def main():
await initialize_conversation_and_graph(CONVERSATION) # await initialize_conversation_and_graph(CONVERSATION)
is_correct = await run_question_and_submit_feedback("Who told Bob to bring the donuts?") # is_correct = await run_question_and_submit_feedback("Who told Bob to bring the donuts?")
# is_correct = False is_correct = False
if not is_correct: if not is_correct:
await run_feedback_enrichment_memify(last_n=5) await run_feedback_enrichment_memify(last_n=5)