adds pipeline frame to the poc

2025-08-03 11:51:08 +02:00 · 2025-08-03 11:51:08 +02:00 · 023f98b33e
commit 023f98b33e
parent df17ae7720
3 changed files with 222 additions and 0 deletions
--- a/cognee/triplet_embedding_poc/init.py
+++ b/cognee/triplet_embedding_poc/init.py
--- a/cognee/triplet_embedding_poc/triplet_embedding_poc_example.py
+++ b/cognee/triplet_embedding_poc/triplet_embedding_poc_example.py
@ -0,0 +1,188 @@
 import asyncio
 import cognee
 from cognee.shared.logging_utils import setup_logging, INFO
 from cognee.triplet_embedding_poc.triplet_embedding_postprocessing import (
    triplet_embedding_postprocessing,
 )
 job_1 = """
 CV 1: Relevant
 Name: Dr. Emily Carter
 Contact Information:
 Email: emily.carter@example.com
 Phone: (555) 123-4567
 Summary:
 Senior Data Scientist with over 8 years of experience in machine learning and predictive analytics. Expertise in developing advanced algorithms and deploying scalable models in production environments.
 Education:
 Ph.D. in Computer Science, Stanford University (2014)
 B.S. in Mathematics, University of California, Berkeley (2010)
 Experience:
 Senior Data Scientist, InnovateAI Labs (2016 – Present)
 Led a team in developing machine learning models for natural language processing applications.
 Implemented deep learning algorithms that improved prediction accuracy by 25%.
 Collaborated with cross-functional teams to integrate models into cloud-based platforms.
 Data Scientist, DataWave Analytics (2014 – 2016)
 Developed predictive models for customer segmentation and churn analysis.
 Analyzed large datasets using Hadoop and Spark frameworks.
 Skills:
 Programming Languages: Python, R, SQL
 Machine Learning: TensorFlow, Keras, Scikit-Learn
 Big Data Technologies: Hadoop, Spark
 Data Visualization: Tableau, Matplotlib
 """
 job_2 = """
 CV 2: Relevant
 Name: Michael Rodriguez
 Contact Information:
 Email: michael.rodriguez@example.com
 Phone: (555) 234-5678
 Summary:
 Data Scientist with a strong background in machine learning and statistical modeling. Skilled in handling large datasets and translating data into actionable business insights.
 Education:
 M.S. in Data Science, Carnegie Mellon University (2013)
 B.S. in Computer Science, University of Michigan (2011)
 Experience:
 Senior Data Scientist, Alpha Analytics (2017 – Present)
 Developed machine learning models to optimize marketing strategies.
 Reduced customer acquisition cost by 15% through predictive modeling.
 Data Scientist, TechInsights (2013 – 2017)
 Analyzed user behavior data to improve product features.
 Implemented A/B testing frameworks to evaluate product changes.
 Skills:
 Programming Languages: Python, Java, SQL
 Machine Learning: Scikit-Learn, XGBoost
 Data Visualization: Seaborn, Plotly
 Databases: MySQL, MongoDB
 """
 job_3 = """
 CV 3: Relevant
 Name: Sarah Nguyen
 Contact Information:
 Email: sarah.nguyen@example.com
 Phone: (555) 345-6789
 Summary:
 Data Scientist specializing in machine learning with 6 years of experience. Passionate about leveraging data to drive business solutions and improve product performance.
 Education:
 M.S. in Statistics, University of Washington (2014)
 B.S. in Applied Mathematics, University of Texas at Austin (2012)
 Experience:
 Data Scientist, QuantumTech (2016 – Present)
 Designed and implemented machine learning algorithms for financial forecasting.
 Improved model efficiency by 20% through algorithm optimization.
 Junior Data Scientist, DataCore Solutions (2014 – 2016)
 Assisted in developing predictive models for supply chain optimization.
 Conducted data cleaning and preprocessing on large datasets.
 Skills:
 Programming Languages: Python, R
 Machine Learning Frameworks: PyTorch, Scikit-Learn
 Statistical Analysis: SAS, SPSS
 Cloud Platforms: AWS, Azure
 """
 job_4 = """
 CV 4: Not Relevant
 Name: David Thompson
 Contact Information:
 Email: david.thompson@example.com
 Phone: (555) 456-7890
 Summary:
 Creative Graphic Designer with over 8 years of experience in visual design and branding. Proficient in Adobe Creative Suite and passionate about creating compelling visuals.
 Education:
 B.F.A. in Graphic Design, Rhode Island School of Design (2012)
 Experience:
 Senior Graphic Designer, CreativeWorks Agency (2015 – Present)
 Led design projects for clients in various industries.
 Created branding materials that increased client engagement by 30%.
 Graphic Designer, Visual Innovations (2012 – 2015)
 Designed marketing collateral, including brochures, logos, and websites.
 Collaborated with the marketing team to develop cohesive brand strategies.
 Skills:
 Design Software: Adobe Photoshop, Illustrator, InDesign
 Web Design: HTML, CSS
 Specialties: Branding and Identity, Typography
 """
 job_5 = """
 CV 5: Not Relevant
 Name: Jessica Miller
 Contact Information:
 Email: jessica.miller@example.com
 Phone: (555) 567-8901
 Summary:
 Experienced Sales Manager with a strong track record in driving sales growth and building high-performing teams. Excellent communication and leadership skills.
 Education:
 B.A. in Business Administration, University of Southern California (2010)
 Experience:
 Sales Manager, Global Enterprises (2015 – Present)
 Managed a sales team of 15 members, achieving a 20% increase in annual revenue.
 Developed sales strategies that expanded customer base by 25%.
 Sales Representative, Market Leaders Inc. (2010 – 2015)
 Consistently exceeded sales targets and received the 'Top Salesperson' award in 2013.
 Skills:
 Sales Strategy and Planning
 Team Leadership and Development
 CRM Software: Salesforce, Zoho
 Negotiation and Relationship Building
 """
 async def main():
    pre_graph_creation = False
    if pre_graph_creation:
        await cognee.prune.prune_data()
        await cognee.prune.prune_system(metadata=True)
        text_list = [job_1, job_2, job_3, job_4, job_5]
        for text in text_list:
            await cognee.add(text)
            print(f"Added text: {text[:35]}...")
        await cognee.cognify()
    await triplet_embedding_postprocessing()
 if __name__ == "__main__":
    logger = setup_logging(log_level=INFO)
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    try:
        loop.run_until_complete(main())
    finally:
        loop.run_until_complete(loop.shutdown_asyncgens())
--- a/cognee/triplet_embedding_poc/triplet_embedding_postprocessing.py
+++ b/cognee/triplet_embedding_poc/triplet_embedding_postprocessing.py
@ -0,0 +1,34 @@
 from typing import Any
 from cognee.modules.pipelines.operations.run_tasks_base import run_tasks_base
 from cognee.modules.users.methods import get_default_user
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.pipelines.tasks.task import Task
 logger = get_logger("triplet_embedding_poc")
 async def get_triplets_from_graph_store(data) -> Any:
    for i in range(0, 5):
        yield i
 async def add_triplets_to_collection(data) -> None:
    print(data)
 async def get_triplet_embedding_tasks() -> list[Task]:
    triplet_embedding_tasks = [
        Task(get_triplets_from_graph_store),
        Task(add_triplets_to_collection),
    ]
    return triplet_embedding_tasks
 async def triplet_embedding_postprocessing():
    tasks = await get_triplet_embedding_tasks()
    async for result in run_tasks_base(tasks, user=await get_default_user(), data=[]):
        print(result)