diff --git a/cognee/triplet_embedding_poc/__init__.py b/cognee/triplet_embedding_poc/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognee/triplet_embedding_poc/triplet_embedding_poc_example.py b/cognee/triplet_embedding_poc/triplet_embedding_poc_example.py new file mode 100644 index 000000000..c2e889101 --- /dev/null +++ b/cognee/triplet_embedding_poc/triplet_embedding_poc_example.py @@ -0,0 +1,188 @@ +import asyncio +import cognee +from cognee.shared.logging_utils import setup_logging, INFO +from cognee.triplet_embedding_poc.triplet_embedding_postprocessing import ( + triplet_embedding_postprocessing, +) + +job_1 = """ +CV 1: Relevant +Name: Dr. Emily Carter +Contact Information: + +Email: emily.carter@example.com +Phone: (555) 123-4567 +Summary: + +Senior Data Scientist with over 8 years of experience in machine learning and predictive analytics. Expertise in developing advanced algorithms and deploying scalable models in production environments. + +Education: + +Ph.D. in Computer Science, Stanford University (2014) +B.S. in Mathematics, University of California, Berkeley (2010) +Experience: + +Senior Data Scientist, InnovateAI Labs (2016 – Present) +Led a team in developing machine learning models for natural language processing applications. +Implemented deep learning algorithms that improved prediction accuracy by 25%. +Collaborated with cross-functional teams to integrate models into cloud-based platforms. +Data Scientist, DataWave Analytics (2014 – 2016) +Developed predictive models for customer segmentation and churn analysis. +Analyzed large datasets using Hadoop and Spark frameworks. +Skills: + +Programming Languages: Python, R, SQL +Machine Learning: TensorFlow, Keras, Scikit-Learn +Big Data Technologies: Hadoop, Spark +Data Visualization: Tableau, Matplotlib +""" + +job_2 = """ +CV 2: Relevant +Name: Michael Rodriguez +Contact Information: + +Email: michael.rodriguez@example.com +Phone: (555) 234-5678 +Summary: + +Data Scientist with a strong background in machine learning and statistical modeling. Skilled in handling large datasets and translating data into actionable business insights. + +Education: + +M.S. in Data Science, Carnegie Mellon University (2013) +B.S. in Computer Science, University of Michigan (2011) +Experience: + +Senior Data Scientist, Alpha Analytics (2017 – Present) +Developed machine learning models to optimize marketing strategies. +Reduced customer acquisition cost by 15% through predictive modeling. +Data Scientist, TechInsights (2013 – 2017) +Analyzed user behavior data to improve product features. +Implemented A/B testing frameworks to evaluate product changes. +Skills: + +Programming Languages: Python, Java, SQL +Machine Learning: Scikit-Learn, XGBoost +Data Visualization: Seaborn, Plotly +Databases: MySQL, MongoDB +""" + + +job_3 = """ +CV 3: Relevant +Name: Sarah Nguyen +Contact Information: + +Email: sarah.nguyen@example.com +Phone: (555) 345-6789 +Summary: + +Data Scientist specializing in machine learning with 6 years of experience. Passionate about leveraging data to drive business solutions and improve product performance. + +Education: + +M.S. in Statistics, University of Washington (2014) +B.S. in Applied Mathematics, University of Texas at Austin (2012) +Experience: + +Data Scientist, QuantumTech (2016 – Present) +Designed and implemented machine learning algorithms for financial forecasting. +Improved model efficiency by 20% through algorithm optimization. +Junior Data Scientist, DataCore Solutions (2014 – 2016) +Assisted in developing predictive models for supply chain optimization. +Conducted data cleaning and preprocessing on large datasets. +Skills: + +Programming Languages: Python, R +Machine Learning Frameworks: PyTorch, Scikit-Learn +Statistical Analysis: SAS, SPSS +Cloud Platforms: AWS, Azure +""" + + +job_4 = """ +CV 4: Not Relevant +Name: David Thompson +Contact Information: + +Email: david.thompson@example.com +Phone: (555) 456-7890 +Summary: + +Creative Graphic Designer with over 8 years of experience in visual design and branding. Proficient in Adobe Creative Suite and passionate about creating compelling visuals. + +Education: + +B.F.A. in Graphic Design, Rhode Island School of Design (2012) +Experience: + +Senior Graphic Designer, CreativeWorks Agency (2015 – Present) +Led design projects for clients in various industries. +Created branding materials that increased client engagement by 30%. +Graphic Designer, Visual Innovations (2012 – 2015) +Designed marketing collateral, including brochures, logos, and websites. +Collaborated with the marketing team to develop cohesive brand strategies. +Skills: + +Design Software: Adobe Photoshop, Illustrator, InDesign +Web Design: HTML, CSS +Specialties: Branding and Identity, Typography +""" + + +job_5 = """ +CV 5: Not Relevant +Name: Jessica Miller +Contact Information: + +Email: jessica.miller@example.com +Phone: (555) 567-8901 +Summary: + +Experienced Sales Manager with a strong track record in driving sales growth and building high-performing teams. Excellent communication and leadership skills. + +Education: + +B.A. in Business Administration, University of Southern California (2010) +Experience: + +Sales Manager, Global Enterprises (2015 – Present) +Managed a sales team of 15 members, achieving a 20% increase in annual revenue. +Developed sales strategies that expanded customer base by 25%. +Sales Representative, Market Leaders Inc. (2010 – 2015) +Consistently exceeded sales targets and received the 'Top Salesperson' award in 2013. +Skills: + +Sales Strategy and Planning +Team Leadership and Development +CRM Software: Salesforce, Zoho +Negotiation and Relationship Building +""" + + +async def main(): + pre_graph_creation = False + + if pre_graph_creation: + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + text_list = [job_1, job_2, job_3, job_4, job_5] + for text in text_list: + await cognee.add(text) + print(f"Added text: {text[:35]}...") + await cognee.cognify() + + await triplet_embedding_postprocessing() + + +if __name__ == "__main__": + logger = setup_logging(log_level=INFO) + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main()) + finally: + loop.run_until_complete(loop.shutdown_asyncgens()) diff --git a/cognee/triplet_embedding_poc/triplet_embedding_postprocessing.py b/cognee/triplet_embedding_poc/triplet_embedding_postprocessing.py new file mode 100644 index 000000000..a9611cd91 --- /dev/null +++ b/cognee/triplet_embedding_poc/triplet_embedding_postprocessing.py @@ -0,0 +1,34 @@ +from typing import Any + +from cognee.modules.pipelines.operations.run_tasks_base import run_tasks_base +from cognee.modules.users.methods import get_default_user +from cognee.shared.logging_utils import get_logger +from cognee.modules.pipelines.tasks.task import Task + + +logger = get_logger("triplet_embedding_poc") + + +async def get_triplets_from_graph_store(data) -> Any: + for i in range(0, 5): + yield i + + +async def add_triplets_to_collection(data) -> None: + print(data) + + +async def get_triplet_embedding_tasks() -> list[Task]: + triplet_embedding_tasks = [ + Task(get_triplets_from_graph_store), + Task(add_triplets_to_collection), + ] + + return triplet_embedding_tasks + + +async def triplet_embedding_postprocessing(): + tasks = await get_triplet_embedding_tasks() + + async for result in run_tasks_base(tasks, user=await get_default_user(), data=[]): + print(result)