#!/usr/bin/env python3 """ End-to-End Example: Usage Frequency Tracking in Cognee This example demonstrates the complete workflow for tracking and analyzing how frequently different graph elements are accessed through user searches. Features demonstrated: - Setting up a knowledge base - Running searches with interaction tracking (save_interaction=True) - Extracting usage frequencies from interaction data - Applying frequency weights to graph nodes - Analyzing and visualizing the results Use cases: - Ranking search results by popularity - Identifying "hot topics" in your knowledge base - Understanding user behavior and interests - Improving retrieval based on usage patterns """ import asyncio import os from datetime import timedelta from typing import List, Dict, Any from dotenv import load_dotenv import cognee from cognee.api.v1.search import SearchType from cognee.infrastructure.databases.graph import get_graph_engine from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph from cognee.tasks.memify.extract_usage_frequency import run_usage_frequency_update # Load environment variables load_dotenv() # ============================================================================ # STEP 1: Setup and Configuration # ============================================================================ async def setup_knowledge_base(): """ Create a fresh knowledge base with sample content. In a real application, you would: - Load documents from files, databases, or APIs - Process larger datasets - Organize content by datasets/categories """ print("=" * 80) print("STEP 1: Setting up knowledge base") print("=" * 80) # Reset state for clean demo (optional in production) print("\nResetting Cognee state...") await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) print("✓ Reset complete") # Sample content: AI/ML educational material documents = [ """ Machine Learning Fundamentals: Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed. The three main types are supervised learning, unsupervised learning, and reinforcement learning. """, """ Neural Networks Explained: Neural networks are computing systems inspired by biological neural networks. They consist of layers of interconnected nodes (neurons) that process information through weighted connections. Deep learning uses neural networks with many layers to automatically learn hierarchical representations of data. """, """ Natural Language Processing: NLP enables computers to understand, interpret, and generate human language. Modern NLP uses transformer architectures like BERT and GPT, which have revolutionized tasks such as translation, summarization, and question answering. """, """ Computer Vision Applications: Computer vision allows machines to interpret visual information from the world. Convolutional neural networks (CNNs) are particularly effective for image recognition, object detection, and image segmentation tasks. """, ] print(f"\nAdding {len(documents)} documents to knowledge base...") await cognee.add(documents, dataset_name="ai_ml_fundamentals") print("✓ Documents added") # Build knowledge graph print("\nBuilding knowledge graph (cognify)...") await cognee.cognify() print("✓ Knowledge graph built") print("\n" + "=" * 80) # ============================================================================ # STEP 2: Simulate User Searches with Interaction Tracking # ============================================================================ async def simulate_user_searches(queries: List[str]): """ Simulate users searching the knowledge base. The key parameter is save_interaction=True, which creates: - CogneeUserInteraction nodes (one per search) - used_graph_element_to_answer edges (connecting queries to relevant nodes) Args: queries: List of search queries to simulate Returns: Number of successful searches """ print("=" * 80) print("STEP 2: Simulating user searches with interaction tracking") print("=" * 80) successful_searches = 0 for i, query in enumerate(queries, 1): print(f"\nSearch {i}/{len(queries)}: '{query}'") try: results = await cognee.search( query_type=SearchType.GRAPH_COMPLETION, query_text=query, save_interaction=True, # ← THIS IS CRITICAL! top_k=5, ) successful_searches += 1 # Show snippet of results result_preview = str(results)[:100] if results else "No results" print(f" ✓ Completed ({result_preview}...)") except Exception as e: print(f" ✗ Failed: {e}") print(f"\n✓ Completed {successful_searches}/{len(queries)} searches") print("=" * 80) return successful_searches # ============================================================================ # STEP 3: Extract and Apply Usage Frequencies # ============================================================================ async def extract_and_apply_frequencies( time_window_days: int = 7, min_threshold: int = 1 ) -> Dict[str, Any]: """ Extract usage frequencies from interactions and apply them to the graph. This function: 1. Retrieves the graph with interaction data 2. Counts how often each node was accessed 3. Writes frequency_weight property back to nodes Args: time_window_days: Only count interactions from last N days min_threshold: Minimum accesses to track (filter out rarely used nodes) Returns: Dictionary with statistics about the frequency update """ print("=" * 80) print("STEP 3: Extracting and applying usage frequencies") print("=" * 80) # Get graph adapter graph_engine = await get_graph_engine() # Retrieve graph with interactions print("\nRetrieving graph from database...") graph = CogneeGraph() await graph.project_graph_from_db( adapter=graph_engine, node_properties_to_project=[ "type", "node_type", "timestamp", "created_at", "text", "name", "query_text", "frequency_weight", ], edge_properties_to_project=["relationship_type", "timestamp"], directed=True, ) print(f"✓ Retrieved: {len(graph.nodes)} nodes, {len(graph.edges)} edges") # Count interaction nodes interaction_nodes = [ n for n in graph.nodes.values() if n.attributes.get("type") == "CogneeUserInteraction" or n.attributes.get("node_type") == "CogneeUserInteraction" ] print(f"✓ Found {len(interaction_nodes)} interaction nodes") # Run frequency extraction and update print(f"\nExtracting frequencies (time window: {time_window_days} days)...") stats = await run_usage_frequency_update( graph_adapter=graph_engine, subgraphs=[graph], time_window=timedelta(days=time_window_days), min_interaction_threshold=min_threshold, ) print("\n✓ Frequency extraction complete!") print( f" - Interactions processed: {stats['interactions_in_window']}/{stats['total_interactions']}" ) print(f" - Nodes weighted: {len(stats['node_frequencies'])}") print(f" - Element types tracked: {stats.get('element_type_frequencies', {})}") print("=" * 80) return stats # ============================================================================ # STEP 4: Analyze and Display Results # ============================================================================ async def analyze_results(stats: Dict[str, Any]): """ Analyze and display the frequency tracking results. Shows: - Top most frequently accessed nodes - Element type distribution - Verification that weights were written to database Args: stats: Statistics from frequency extraction """ print("=" * 80) print("STEP 4: Analyzing usage frequency results") print("=" * 80) # Display top nodes by frequency if stats["node_frequencies"]: print("\n📊 Top 10 Most Frequently Accessed Elements:") print("-" * 80) sorted_nodes = sorted(stats["node_frequencies"].items(), key=lambda x: x[1], reverse=True) # Get graph to display node details graph_engine = await get_graph_engine() graph = CogneeGraph() await graph.project_graph_from_db( adapter=graph_engine, node_properties_to_project=["type", "text", "name"], edge_properties_to_project=[], directed=True, ) for i, (node_id, frequency) in enumerate(sorted_nodes[:10], 1): node = graph.get_node(node_id) if node: node_type = node.attributes.get("type", "Unknown") text = node.attributes.get("text") or node.attributes.get("name") or "" text_preview = text[:60] + "..." if len(text) > 60 else text print(f"\n{i}. Frequency: {frequency} accesses") print(f" Type: {node_type}") print(f" Content: {text_preview}") else: print(f"\n{i}. Frequency: {frequency} accesses") print(f" Node ID: {node_id[:50]}...") # Display element type distribution if stats.get("element_type_frequencies"): print("\n\n📈 Element Type Distribution:") print("-" * 80) type_dist = stats["element_type_frequencies"] for elem_type, count in sorted(type_dist.items(), key=lambda x: x[1], reverse=True): print(f" {elem_type}: {count} accesses") # Verify weights in database (Neo4j only) print("\n\n🔍 Verifying weights in database...") print("-" * 80) graph_engine = await get_graph_engine() adapter_type = type(graph_engine).__name__ if adapter_type == "Neo4jAdapter": try: result = await graph_engine.query(""" MATCH (n) WHERE n.frequency_weight IS NOT NULL RETURN count(n) as weighted_count """) count = result[0]["weighted_count"] if result else 0 if count > 0: print(f"✓ {count} nodes have frequency_weight in Neo4j database") # Show sample sample = await graph_engine.query(""" MATCH (n) WHERE n.frequency_weight IS NOT NULL RETURN n.frequency_weight as weight, labels(n) as labels ORDER BY n.frequency_weight DESC LIMIT 3 """) print("\nSample weighted nodes:") for row in sample: print(f" - Weight: {row['weight']}, Type: {row['labels']}") else: print("⚠ No nodes with frequency_weight found in database") except Exception as e: print(f"Could not verify in Neo4j: {e}") else: print(f"Database verification not implemented for {adapter_type}") print("\n" + "=" * 80) # ============================================================================ # STEP 5: Demonstrate Usage in Retrieval # ============================================================================ async def demonstrate_retrieval_usage(): """ Demonstrate how frequency weights can be used in retrieval. Note: This is a conceptual demonstration. To actually use frequency weights in ranking, you would need to modify the retrieval/completion strategies to incorporate the frequency_weight property. """ print("=" * 80) print("STEP 5: How to use frequency weights in retrieval") print("=" * 80) print(""" Frequency weights can be used to improve search results: 1. RANKING BOOST: - Multiply relevance scores by frequency_weight - Prioritize frequently accessed nodes in results 2. COMPLETION STRATEGIES: - Adjust triplet importance based on usage - Filter out rarely accessed information 3. ANALYTICS: - Track trending topics over time - Understand user interests and behavior - Identify knowledge gaps (low-frequency nodes) 4. ADAPTIVE RETRIEVAL: - Personalize results based on team usage patterns - Surface popular answers faster Example Cypher query with frequency boost (Neo4j): MATCH (n) WHERE n.text CONTAINS $search_term RETURN n, n.frequency_weight as boost ORDER BY (n.relevance_score * COALESCE(n.frequency_weight, 1)) DESC LIMIT 10 To integrate this into Cognee, you would modify the completion strategy to include frequency_weight in the scoring function. """) print("=" * 80) # ============================================================================ # MAIN: Run Complete Example # ============================================================================ async def main(): """ Run the complete end-to-end usage frequency tracking example. """ print("\n") print("╔" + "=" * 78 + "╗") print("║" + " " * 78 + "║") print("║" + " Usage Frequency Tracking - End-to-End Example".center(78) + "║") print("║" + " " * 78 + "║") print("╚" + "=" * 78 + "╝") print("\n") # Configuration check print("Configuration:") print(f" Graph Provider: {os.getenv('GRAPH_DATABASE_PROVIDER')}") print(f" Graph Handler: {os.getenv('GRAPH_DATASET_HANDLER')}") print(f" LLM Provider: {os.getenv('LLM_PROVIDER')}") # Verify LLM key is set if not os.getenv("LLM_API_KEY") or os.getenv("LLM_API_KEY") == "sk-your-key-here": print("\n⚠ WARNING: LLM_API_KEY not set in .env file") print(" Set your API key to run searches") return print("\n") try: # Step 1: Setup await setup_knowledge_base() # Step 2: Simulate searches # Note: Repeat queries increase frequency for those topics queries = [ "What is machine learning?", "Explain neural networks", "How does deep learning work?", "Tell me about neural networks", # Repeat - increases frequency "What are transformers in NLP?", "Explain neural networks again", # Another repeat "How does computer vision work?", "What is reinforcement learning?", "Tell me more about neural networks", # Third repeat ] successful_searches = await simulate_user_searches(queries) if successful_searches == 0: print("⚠ No searches completed - cannot demonstrate frequency tracking") return # Step 3: Extract frequencies stats = await extract_and_apply_frequencies(time_window_days=7, min_threshold=1) # Step 4: Analyze results await analyze_results(stats) # Step 5: Show usage examples await demonstrate_retrieval_usage() # Summary print("\n") print("╔" + "=" * 78 + "╗") print("║" + " " * 78 + "║") print("║" + " Example Complete!".center(78) + "║") print("║" + " " * 78 + "║") print("╚" + "=" * 78 + "╝") print("\n") print("Summary:") print(" ✓ Documents added: 4") print(f" ✓ Searches performed: {successful_searches}") print(f" ✓ Interactions tracked: {stats['interactions_in_window']}") print(f" ✓ Nodes weighted: {len(stats['node_frequencies'])}") print("\nNext steps:") print(" 1. Open Neo4j Browser (http://localhost:7474) to explore the graph") print(" 2. Modify retrieval strategies to use frequency_weight") print(" 3. Build analytics dashboards using element_type_frequencies") print(" 4. Run periodic frequency updates to track trends over time") print("\n") except Exception as e: print(f"\n✗ Example failed: {e}") import traceback traceback.print_exc() if __name__ == "__main__": asyncio.run(main())