adds pipeline frame to the poc
This commit is contained in:
parent
df17ae7720
commit
023f98b33e
3 changed files with 222 additions and 0 deletions
0
cognee/triplet_embedding_poc/__init__.py
Normal file
0
cognee/triplet_embedding_poc/__init__.py
Normal file
188
cognee/triplet_embedding_poc/triplet_embedding_poc_example.py
Normal file
188
cognee/triplet_embedding_poc/triplet_embedding_poc_example.py
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
import asyncio
|
||||
import cognee
|
||||
from cognee.shared.logging_utils import setup_logging, INFO
|
||||
from cognee.triplet_embedding_poc.triplet_embedding_postprocessing import (
|
||||
triplet_embedding_postprocessing,
|
||||
)
|
||||
|
||||
job_1 = """
|
||||
CV 1: Relevant
|
||||
Name: Dr. Emily Carter
|
||||
Contact Information:
|
||||
|
||||
Email: emily.carter@example.com
|
||||
Phone: (555) 123-4567
|
||||
Summary:
|
||||
|
||||
Senior Data Scientist with over 8 years of experience in machine learning and predictive analytics. Expertise in developing advanced algorithms and deploying scalable models in production environments.
|
||||
|
||||
Education:
|
||||
|
||||
Ph.D. in Computer Science, Stanford University (2014)
|
||||
B.S. in Mathematics, University of California, Berkeley (2010)
|
||||
Experience:
|
||||
|
||||
Senior Data Scientist, InnovateAI Labs (2016 – Present)
|
||||
Led a team in developing machine learning models for natural language processing applications.
|
||||
Implemented deep learning algorithms that improved prediction accuracy by 25%.
|
||||
Collaborated with cross-functional teams to integrate models into cloud-based platforms.
|
||||
Data Scientist, DataWave Analytics (2014 – 2016)
|
||||
Developed predictive models for customer segmentation and churn analysis.
|
||||
Analyzed large datasets using Hadoop and Spark frameworks.
|
||||
Skills:
|
||||
|
||||
Programming Languages: Python, R, SQL
|
||||
Machine Learning: TensorFlow, Keras, Scikit-Learn
|
||||
Big Data Technologies: Hadoop, Spark
|
||||
Data Visualization: Tableau, Matplotlib
|
||||
"""
|
||||
|
||||
job_2 = """
|
||||
CV 2: Relevant
|
||||
Name: Michael Rodriguez
|
||||
Contact Information:
|
||||
|
||||
Email: michael.rodriguez@example.com
|
||||
Phone: (555) 234-5678
|
||||
Summary:
|
||||
|
||||
Data Scientist with a strong background in machine learning and statistical modeling. Skilled in handling large datasets and translating data into actionable business insights.
|
||||
|
||||
Education:
|
||||
|
||||
M.S. in Data Science, Carnegie Mellon University (2013)
|
||||
B.S. in Computer Science, University of Michigan (2011)
|
||||
Experience:
|
||||
|
||||
Senior Data Scientist, Alpha Analytics (2017 – Present)
|
||||
Developed machine learning models to optimize marketing strategies.
|
||||
Reduced customer acquisition cost by 15% through predictive modeling.
|
||||
Data Scientist, TechInsights (2013 – 2017)
|
||||
Analyzed user behavior data to improve product features.
|
||||
Implemented A/B testing frameworks to evaluate product changes.
|
||||
Skills:
|
||||
|
||||
Programming Languages: Python, Java, SQL
|
||||
Machine Learning: Scikit-Learn, XGBoost
|
||||
Data Visualization: Seaborn, Plotly
|
||||
Databases: MySQL, MongoDB
|
||||
"""
|
||||
|
||||
|
||||
job_3 = """
|
||||
CV 3: Relevant
|
||||
Name: Sarah Nguyen
|
||||
Contact Information:
|
||||
|
||||
Email: sarah.nguyen@example.com
|
||||
Phone: (555) 345-6789
|
||||
Summary:
|
||||
|
||||
Data Scientist specializing in machine learning with 6 years of experience. Passionate about leveraging data to drive business solutions and improve product performance.
|
||||
|
||||
Education:
|
||||
|
||||
M.S. in Statistics, University of Washington (2014)
|
||||
B.S. in Applied Mathematics, University of Texas at Austin (2012)
|
||||
Experience:
|
||||
|
||||
Data Scientist, QuantumTech (2016 – Present)
|
||||
Designed and implemented machine learning algorithms for financial forecasting.
|
||||
Improved model efficiency by 20% through algorithm optimization.
|
||||
Junior Data Scientist, DataCore Solutions (2014 – 2016)
|
||||
Assisted in developing predictive models for supply chain optimization.
|
||||
Conducted data cleaning and preprocessing on large datasets.
|
||||
Skills:
|
||||
|
||||
Programming Languages: Python, R
|
||||
Machine Learning Frameworks: PyTorch, Scikit-Learn
|
||||
Statistical Analysis: SAS, SPSS
|
||||
Cloud Platforms: AWS, Azure
|
||||
"""
|
||||
|
||||
|
||||
job_4 = """
|
||||
CV 4: Not Relevant
|
||||
Name: David Thompson
|
||||
Contact Information:
|
||||
|
||||
Email: david.thompson@example.com
|
||||
Phone: (555) 456-7890
|
||||
Summary:
|
||||
|
||||
Creative Graphic Designer with over 8 years of experience in visual design and branding. Proficient in Adobe Creative Suite and passionate about creating compelling visuals.
|
||||
|
||||
Education:
|
||||
|
||||
B.F.A. in Graphic Design, Rhode Island School of Design (2012)
|
||||
Experience:
|
||||
|
||||
Senior Graphic Designer, CreativeWorks Agency (2015 – Present)
|
||||
Led design projects for clients in various industries.
|
||||
Created branding materials that increased client engagement by 30%.
|
||||
Graphic Designer, Visual Innovations (2012 – 2015)
|
||||
Designed marketing collateral, including brochures, logos, and websites.
|
||||
Collaborated with the marketing team to develop cohesive brand strategies.
|
||||
Skills:
|
||||
|
||||
Design Software: Adobe Photoshop, Illustrator, InDesign
|
||||
Web Design: HTML, CSS
|
||||
Specialties: Branding and Identity, Typography
|
||||
"""
|
||||
|
||||
|
||||
job_5 = """
|
||||
CV 5: Not Relevant
|
||||
Name: Jessica Miller
|
||||
Contact Information:
|
||||
|
||||
Email: jessica.miller@example.com
|
||||
Phone: (555) 567-8901
|
||||
Summary:
|
||||
|
||||
Experienced Sales Manager with a strong track record in driving sales growth and building high-performing teams. Excellent communication and leadership skills.
|
||||
|
||||
Education:
|
||||
|
||||
B.A. in Business Administration, University of Southern California (2010)
|
||||
Experience:
|
||||
|
||||
Sales Manager, Global Enterprises (2015 – Present)
|
||||
Managed a sales team of 15 members, achieving a 20% increase in annual revenue.
|
||||
Developed sales strategies that expanded customer base by 25%.
|
||||
Sales Representative, Market Leaders Inc. (2010 – 2015)
|
||||
Consistently exceeded sales targets and received the 'Top Salesperson' award in 2013.
|
||||
Skills:
|
||||
|
||||
Sales Strategy and Planning
|
||||
Team Leadership and Development
|
||||
CRM Software: Salesforce, Zoho
|
||||
Negotiation and Relationship Building
|
||||
"""
|
||||
|
||||
|
||||
async def main():
|
||||
pre_graph_creation = False
|
||||
|
||||
if pre_graph_creation:
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
text_list = [job_1, job_2, job_3, job_4, job_5]
|
||||
for text in text_list:
|
||||
await cognee.add(text)
|
||||
print(f"Added text: {text[:35]}...")
|
||||
await cognee.cognify()
|
||||
|
||||
await triplet_embedding_postprocessing()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger = setup_logging(log_level=INFO)
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
loop.run_until_complete(main())
|
||||
finally:
|
||||
loop.run_until_complete(loop.shutdown_asyncgens())
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
from typing import Any
|
||||
|
||||
from cognee.modules.pipelines.operations.run_tasks_base import run_tasks_base
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.pipelines.tasks.task import Task
|
||||
|
||||
|
||||
logger = get_logger("triplet_embedding_poc")
|
||||
|
||||
|
||||
async def get_triplets_from_graph_store(data) -> Any:
|
||||
for i in range(0, 5):
|
||||
yield i
|
||||
|
||||
|
||||
async def add_triplets_to_collection(data) -> None:
|
||||
print(data)
|
||||
|
||||
|
||||
async def get_triplet_embedding_tasks() -> list[Task]:
|
||||
triplet_embedding_tasks = [
|
||||
Task(get_triplets_from_graph_store),
|
||||
Task(add_triplets_to_collection),
|
||||
]
|
||||
|
||||
return triplet_embedding_tasks
|
||||
|
||||
|
||||
async def triplet_embedding_postprocessing():
|
||||
tasks = await get_triplet_embedding_tasks()
|
||||
|
||||
async for result in run_tasks_base(tasks, user=await get_default_user(), data=[]):
|
||||
print(result)
|
||||
Loading…
Add table
Reference in a new issue