325 lines
No EOL
12 KiB
Python
325 lines
No EOL
12 KiB
Python
# cognee/examples/usage_frequency_example.py
|
|
"""
|
|
End-to-end example demonstrating usage frequency tracking in Cognee.
|
|
|
|
This example shows how to:
|
|
1. Add data and build a knowledge graph
|
|
2. Run searches with save_interaction=True to track usage
|
|
3. Extract and apply frequency weights using the memify pipeline
|
|
4. Query and analyze the frequency data
|
|
|
|
The frequency weights can be used to:
|
|
- Rank frequently referenced entities higher during retrieval
|
|
- Adjust scoring for completion strategies
|
|
- Expose usage metrics in dashboards or audits
|
|
"""
|
|
import asyncio
|
|
from datetime import timedelta
|
|
from typing import List
|
|
|
|
import cognee
|
|
from cognee.api.v1.search import SearchType
|
|
from cognee.tasks.memify.extract_usage_frequency import (
|
|
create_usage_frequency_pipeline,
|
|
run_usage_frequency_update,
|
|
)
|
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
|
|
from cognee.shared.logging_utils import get_logger
|
|
|
|
logger = get_logger("usage_frequency_example")
|
|
|
|
|
|
async def setup_knowledge_base():
|
|
"""Set up a fresh knowledge base with sample data."""
|
|
logger.info("Setting up knowledge base...")
|
|
|
|
# Reset cognee state for clean slate
|
|
await cognee.prune.prune_data()
|
|
await cognee.prune.prune_system(metadata=True)
|
|
|
|
# Sample conversation about AI/ML topics
|
|
conversation = [
|
|
"Alice discusses machine learning algorithms and their applications in computer vision.",
|
|
"Bob asks about neural networks and how they differ from traditional algorithms.",
|
|
"Alice explains deep learning concepts including CNNs and transformers.",
|
|
"Bob wants more details about neural networks and backpropagation.",
|
|
"Alice describes reinforcement learning and its use in robotics.",
|
|
"Bob inquires about natural language processing and transformers.",
|
|
]
|
|
|
|
# Add conversation data and build knowledge graph
|
|
logger.info("Adding conversation data...")
|
|
await cognee.add(conversation, dataset_name="ai_ml_conversation")
|
|
|
|
logger.info("Building knowledge graph (cognify)...")
|
|
await cognee.cognify()
|
|
|
|
logger.info("Knowledge base setup complete")
|
|
|
|
|
|
async def simulate_user_searches():
|
|
"""Simulate multiple user searches to generate interaction data."""
|
|
logger.info("Simulating user searches with save_interaction=True...")
|
|
|
|
# Different queries that will create CogneeUserInteraction nodes
|
|
queries = [
|
|
"What is machine learning?",
|
|
"Explain neural networks",
|
|
"Tell me about deep learning",
|
|
"What are neural networks?", # Repeat to increase frequency
|
|
"How does machine learning work?",
|
|
"Describe transformers in NLP",
|
|
"What is reinforcement learning?",
|
|
"Explain neural networks again", # Another repeat
|
|
]
|
|
|
|
search_count = 0
|
|
for query in queries:
|
|
try:
|
|
logger.info(f"Searching: '{query}'")
|
|
results = await cognee.search(
|
|
query_type=SearchType.GRAPH_COMPLETION,
|
|
query_text=query,
|
|
save_interaction=True, # Critical: saves interaction to graph
|
|
top_k=5
|
|
)
|
|
search_count += 1
|
|
logger.debug(f"Search completed, got {len(results) if results else 0} results")
|
|
except Exception as e:
|
|
logger.warning(f"Search failed for '{query}': {e}")
|
|
|
|
logger.info(f"Completed {search_count} searches with interactions saved")
|
|
return search_count
|
|
|
|
|
|
async def retrieve_interaction_graph() -> List[CogneeGraph]:
|
|
"""Retrieve the graph containing interaction nodes."""
|
|
logger.info("Retrieving graph with interaction data...")
|
|
|
|
graph_engine = await get_graph_engine()
|
|
graph = CogneeGraph()
|
|
|
|
# Project the full graph including CogneeUserInteraction nodes
|
|
await graph.project_graph_from_db(
|
|
adapter=graph_engine,
|
|
node_properties_to_project=["type", "node_type", "timestamp", "created_at", "text", "name"],
|
|
edge_properties_to_project=["relationship_type", "timestamp", "created_at"],
|
|
directed=True,
|
|
)
|
|
|
|
logger.info(f"Retrieved graph: {len(graph.nodes)} nodes, {len(graph.edges)} edges")
|
|
|
|
# Count interaction nodes for verification
|
|
interaction_count = sum(
|
|
1 for node in graph.nodes.values()
|
|
if node.attributes.get('type') == 'CogneeUserInteraction' or
|
|
node.attributes.get('node_type') == 'CogneeUserInteraction'
|
|
)
|
|
logger.info(f"Found {interaction_count} CogneeUserInteraction nodes in graph")
|
|
|
|
return [graph]
|
|
|
|
|
|
async def run_frequency_pipeline_method1():
|
|
"""Method 1: Using the pipeline creation function."""
|
|
logger.info("\n=== Method 1: Using create_usage_frequency_pipeline ===")
|
|
|
|
graph_engine = await get_graph_engine()
|
|
subgraphs = await retrieve_interaction_graph()
|
|
|
|
# Create the pipeline tasks
|
|
extraction_tasks, enrichment_tasks = await create_usage_frequency_pipeline(
|
|
graph_adapter=graph_engine,
|
|
time_window=timedelta(days=30), # Last 30 days
|
|
min_interaction_threshold=1, # Count all interactions
|
|
batch_size=100
|
|
)
|
|
|
|
logger.info("Running extraction tasks...")
|
|
# Note: In real memify pipeline, these would be executed by the pipeline runner
|
|
# For this example, we'll execute them manually
|
|
for task in extraction_tasks:
|
|
if hasattr(task, 'function'):
|
|
result = await task.function(
|
|
subgraphs=subgraphs,
|
|
time_window=timedelta(days=30),
|
|
min_interaction_threshold=1
|
|
)
|
|
logger.info(f"Extraction result: {result.get('interactions_in_window')} interactions processed")
|
|
|
|
logger.info("Running enrichment tasks...")
|
|
for task in enrichment_tasks:
|
|
if hasattr(task, 'function'):
|
|
await task.function(
|
|
graph_adapter=graph_engine,
|
|
usage_frequencies=result
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
async def run_frequency_pipeline_method2():
|
|
"""Method 2: Using the convenience function."""
|
|
logger.info("\n=== Method 2: Using run_usage_frequency_update ===")
|
|
|
|
graph_engine = await get_graph_engine()
|
|
subgraphs = await retrieve_interaction_graph()
|
|
|
|
# Run the complete pipeline in one call
|
|
stats = await run_usage_frequency_update(
|
|
graph_adapter=graph_engine,
|
|
subgraphs=subgraphs,
|
|
time_window=timedelta(days=30),
|
|
min_interaction_threshold=1
|
|
)
|
|
|
|
logger.info("Frequency update statistics:")
|
|
logger.info(f" Total interactions: {stats['total_interactions']}")
|
|
logger.info(f" Interactions in window: {stats['interactions_in_window']}")
|
|
logger.info(f" Nodes with frequency weights: {len(stats['node_frequencies'])}")
|
|
logger.info(f" Element types: {stats.get('element_type_frequencies', {})}")
|
|
|
|
return stats
|
|
|
|
|
|
async def analyze_frequency_weights():
|
|
"""Analyze and display the frequency weights that were added."""
|
|
logger.info("\n=== Analyzing Frequency Weights ===")
|
|
|
|
graph_engine = await get_graph_engine()
|
|
graph = CogneeGraph()
|
|
|
|
# Project graph with frequency weights
|
|
await graph.project_graph_from_db(
|
|
adapter=graph_engine,
|
|
node_properties_to_project=[
|
|
"type",
|
|
"node_type",
|
|
"text",
|
|
"name",
|
|
"frequency_weight", # Our added property
|
|
"frequency_updated_at"
|
|
],
|
|
edge_properties_to_project=["relationship_type"],
|
|
directed=True,
|
|
)
|
|
|
|
# Find nodes with frequency weights
|
|
weighted_nodes = []
|
|
for node_id, node in graph.nodes.items():
|
|
freq_weight = node.attributes.get('frequency_weight')
|
|
if freq_weight is not None:
|
|
weighted_nodes.append({
|
|
'id': node_id,
|
|
'type': node.attributes.get('type') or node.attributes.get('node_type'),
|
|
'text': node.attributes.get('text', '')[:100], # First 100 chars
|
|
'name': node.attributes.get('name', ''),
|
|
'frequency_weight': freq_weight,
|
|
'updated_at': node.attributes.get('frequency_updated_at')
|
|
})
|
|
|
|
# Sort by frequency (descending)
|
|
weighted_nodes.sort(key=lambda x: x['frequency_weight'], reverse=True)
|
|
|
|
logger.info(f"\nFound {len(weighted_nodes)} nodes with frequency weights:")
|
|
logger.info("\nTop 10 Most Frequently Referenced Elements:")
|
|
logger.info("-" * 80)
|
|
|
|
for i, node in enumerate(weighted_nodes[:10], 1):
|
|
logger.info(f"\n{i}. Frequency: {node['frequency_weight']}")
|
|
logger.info(f" Type: {node['type']}")
|
|
logger.info(f" Name: {node['name']}")
|
|
logger.info(f" Text: {node['text']}")
|
|
logger.info(f" ID: {node['id'][:50]}...")
|
|
|
|
return weighted_nodes
|
|
|
|
|
|
async def demonstrate_retrieval_with_frequencies():
|
|
"""Demonstrate how frequency weights can be used in retrieval."""
|
|
logger.info("\n=== Demonstrating Retrieval with Frequency Weights ===")
|
|
|
|
# This is a conceptual demonstration of how frequency weights
|
|
# could be used to boost search results
|
|
|
|
query = "neural networks"
|
|
logger.info(f"Searching for: '{query}'")
|
|
|
|
try:
|
|
# Standard search
|
|
standard_results = await cognee.search(
|
|
query_type=SearchType.GRAPH_COMPLETION,
|
|
query_text=query,
|
|
save_interaction=False, # Don't add more interactions
|
|
top_k=5
|
|
)
|
|
|
|
logger.info(f"Standard search returned {len(standard_results) if standard_results else 0} results")
|
|
|
|
# Note: To actually use frequency_weight in scoring, you would need to:
|
|
# 1. Modify the retrieval/ranking logic to consider frequency_weight
|
|
# 2. Add frequency_weight as a scoring factor in the completion strategy
|
|
# 3. Use it in analytics dashboards to show popular topics
|
|
|
|
logger.info("\nFrequency weights can now be used for:")
|
|
logger.info(" - Boosting frequently-accessed nodes in search rankings")
|
|
logger.info(" - Adjusting triplet importance scores")
|
|
logger.info(" - Building usage analytics dashboards")
|
|
logger.info(" - Identifying 'hot' topics in the knowledge graph")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Demonstration search failed: {e}")
|
|
|
|
|
|
async def main():
|
|
"""Main execution flow."""
|
|
logger.info("=" * 80)
|
|
logger.info("Usage Frequency Tracking Example")
|
|
logger.info("=" * 80)
|
|
|
|
try:
|
|
# Step 1: Setup knowledge base
|
|
await setup_knowledge_base()
|
|
|
|
# Step 2: Simulate user searches with save_interaction=True
|
|
search_count = await simulate_user_searches()
|
|
|
|
if search_count == 0:
|
|
logger.warning("No searches completed - cannot demonstrate frequency tracking")
|
|
return
|
|
|
|
# Step 3: Run frequency extraction and enrichment
|
|
# You can use either method - both accomplish the same thing
|
|
|
|
# Option A: Using the convenience function (recommended)
|
|
stats = await run_frequency_pipeline_method2()
|
|
|
|
# Option B: Using the pipeline creation function (for custom pipelines)
|
|
# stats = await run_frequency_pipeline_method1()
|
|
|
|
# Step 4: Analyze the results
|
|
weighted_nodes = await analyze_frequency_weights()
|
|
|
|
# Step 5: Demonstrate retrieval usage
|
|
await demonstrate_retrieval_with_frequencies()
|
|
|
|
# Summary
|
|
logger.info("\n" + "=" * 80)
|
|
logger.info("SUMMARY")
|
|
logger.info("=" * 80)
|
|
logger.info(f"Searches performed: {search_count}")
|
|
logger.info(f"Interactions tracked: {stats.get('interactions_in_window', 0)}")
|
|
logger.info(f"Nodes weighted: {len(weighted_nodes)}")
|
|
logger.info(f"Time window: {stats.get('time_window_days', 0)} days")
|
|
logger.info("\nFrequency weights have been added to the graph!")
|
|
logger.info("These can now be used in retrieval, ranking, and analytics.")
|
|
logger.info("=" * 80)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Example failed: {e}", exc_info=True)
|
|
raise
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |