First working raw crewai demo prototype

This commit is contained in:
hajdul88 2025-05-15 14:41:46 +02:00
parent f825732eb2
commit ce14a441af
9 changed files with 42 additions and 47 deletions

View file

@ -4,9 +4,9 @@ soft_skills_assessment_task:
Evaluate the comments based on the communications clarity, community engagement, and general kindness. Evaluate the comments based on the communications clarity, community engagement, and general kindness.
Please make the output in a way that it expresses the soft skills of the person. Please make the output in a way that it expresses the soft skills of the person.
The people to evaluate are: The people to evaluate are:
-Dean P. -lxobr
-Thomas M. -hajdul88
You can search them by asking comments by 'person', however you are allowed You can search them by asking comments authored by the 'person', however you are allowed
and encouraged to ask multiple questions. and encouraged to ask multiple questions.
expected_output: > expected_output: >
results strictly containing: results strictly containing:
@ -24,9 +24,9 @@ technical_assessment_task:
along with narrative comments detailing your findings. Use strictly the technical input along with narrative comments detailing your findings. Use strictly the technical input
when you are scoring the candidate, you are not allowed to make any comment about soft skills. when you are scoring the candidate, you are not allowed to make any comment about soft skills.
The people to evaluate are: The people to evaluate are:
-Dean P. -lxobr
-Thomas M. -hajdul88
You can search them by code written by 'person', however you are allowed You can search them by code changed by the 'person', however you are allowed
and encouraged to ask multiple questions. and encouraged to ask multiple questions.
expected_output: > expected_output: >
results strictly containing: results strictly containing:
@ -42,8 +42,8 @@ hiring_decision_task:
Review the technical_assessment_task and soft_skills_assessment_task outputs, Review the technical_assessment_task and soft_skills_assessment_task outputs,
then decide HIRE or NO_HIRE for each candidate with a detailed reasoning. then decide HIRE or NO_HIRE for each candidate with a detailed reasoning.
The people to evaluate are: The people to evaluate are:
-Dean P. -lxobr
-Thomas M. -hajdul88
We have to hire one of them. We have to hire one of them.
expected_output: > expected_output: >
A string strictly containing the following for each person: A string strictly containing the following for each person:

View file

@ -3,6 +3,7 @@ from typing import Type, List, Optional
from pydantic import BaseModel, Field, PrivateAttr from pydantic import BaseModel, Field, PrivateAttr
from cognee.api.v1.search import SearchType from cognee.api.v1.search import SearchType
from cognee.modules.engine.models import NodeSet from cognee.modules.engine.models import NodeSet
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
from cognee.modules.search.methods import search from cognee.modules.search.methods import search
from cognee.modules.users.methods import get_default_user from cognee.modules.users.methods import get_default_user
@ -28,12 +29,13 @@ class CogneeSearch(BaseTool):
async def main(): async def main():
try: try:
print(kwargs.get("query")) print(kwargs.get("query"))
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, search_results = await GraphCompletionRetriever(
query_text=kwargs.get("query"), top_k=10,
node_type=NodeSet, node_type=NodeSet,
node_name=self._nodeset_name, node_name=self._nodeset_name,
) ).get_context(query=kwargs.get("query"))
return search_results return search_results
except Exception as e: except Exception as e:
return f"Error: {str(e)}" return f"Error: {str(e)}"

View file

@ -1,4 +1,7 @@
from crewai.tools import BaseTool from crewai.tools import BaseTool
from cognee.modules.engine.models import NodeSet
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
from ..github_ingest_datapoints import cognify_github_data_from_username from ..github_ingest_datapoints import cognify_github_data_from_username
@ -21,6 +24,7 @@ class GithubIngestion(BaseTool):
await cognify_github_data_from_username(applicant_1, token) await cognify_github_data_from_username(applicant_1, token)
await cognify_github_data_from_username(applicant_2, token) await cognify_github_data_from_username(applicant_2, token)
return "Github ingestion finished" return "Github ingestion finished"
except Exception as e: except Exception as e:
return f"Error: {str(e)}" return f"Error: {str(e)}"

View file

@ -26,8 +26,7 @@ def create_github_user_datapoint(user_data, nodesets: List[NodeSet]):
user = GitHubUser( user = GitHubUser(
id=user_id, id=user_id,
login=user_data.get("login", ""), name=user_data.get("login", ""),
name=user_data.get("name"),
bio=user_data.get("bio"), bio=user_data.get("bio"),
company=user_data.get("company"), company=user_data.get("company"),
location=user_data.get("location"), location=user_data.get("location"),
@ -74,7 +73,7 @@ def create_commit_datapoint(
commit = Commit( commit = Commit(
id=commit_id, id=commit_id,
commit_sha=commit_data.get("commit_sha", ""), commit_sha=commit_data.get("commit_sha", ""),
commit_message=commit_data.get("commit_message", ""), text="Commit message:" + (str)(commit_data.get("commit_message", "")),
commit_date=commit_data.get("commit_date", ""), commit_date=commit_data.get("commit_date", ""),
commit_url=commit_data.get("commit_url", ""), commit_url=commit_data.get("commit_url", ""),
author_name=commit_data.get("login", ""), author_name=commit_data.get("login", ""),
@ -102,7 +101,7 @@ def create_file_change_datapoint(
additions=fc_data.get("additions", 0), additions=fc_data.get("additions", 0),
deletions=fc_data.get("deletions", 0), deletions=fc_data.get("deletions", 0),
changes=fc_data.get("changes", 0), changes=fc_data.get("changes", 0),
diff=fc_data.get("diff", ""), text=fc_data.get("diff", ""),
commit_sha=fc_data.get("commit_sha", ""), commit_sha=fc_data.get("commit_sha", ""),
repo=fc_data.get("repo", ""), repo=fc_data.get("repo", ""),
modifies=file.filename, modifies=file.filename,
@ -123,7 +122,7 @@ def create_issue_datapoint(
issue = Issue( issue = Issue(
id=issue_id, id=issue_id,
number=issue_data.get("issue_number", 0), number=issue_data.get("issue_number", 0),
title=issue_data.get("issue_title", ""), text=issue_data.get("issue_title", ""),
state=issue_data.get("issue_state", ""), state=issue_data.get("issue_state", ""),
repository=repo_name, repository=repo_name,
is_pr=False, is_pr=False,
@ -144,7 +143,7 @@ def create_comment_datapoint(
comment = Comment( comment = Comment(
id=comment_id, id=comment_id,
comment_id=str(comment_data.get("comment_id", "")), comment_id=str(comment_data.get("comment_id", "")),
body=comment_data.get("body", ""), text=comment_data.get("body", ""),
created_at=comment_data.get("created_at", ""), created_at=comment_data.get("created_at", ""),
updated_at=comment_data.get("updated_at", ""), updated_at=comment_data.get("updated_at", ""),
author_name=comment_data.get("login", ""), author_name=comment_data.get("login", ""),

View file

@ -1,7 +1,6 @@
from uuid import uuid5, NAMESPACE_OID from uuid import uuid5, NAMESPACE_OID
from typing import Optional, List from typing import Optional, List
from cognee.low_level import DataPoint from cognee.infrastructure.engine import DataPoint
from cognee.modules.engine.models.node_set import NodeSet
class File(DataPoint): class File(DataPoint):
@ -13,7 +12,6 @@ class File(DataPoint):
class GitHubUser(DataPoint): class GitHubUser(DataPoint):
login: str
name: Optional[str] name: Optional[str]
bio: Optional[str] bio: Optional[str]
company: Optional[str] company: Optional[str]
@ -22,7 +20,7 @@ class GitHubUser(DataPoint):
followers: int followers: int
following: int following: int
interacts_with: List["Repository"] = [] interacts_with: List["Repository"] = []
metadata: dict = {"index_fields": ["login"]} metadata: dict = {"index_fields": ["name"]}
class FileChange(DataPoint): class FileChange(DataPoint):
@ -31,49 +29,46 @@ class FileChange(DataPoint):
additions: int additions: int
deletions: int deletions: int
changes: int changes: int
diff: str text: str
commit_sha: str commit_sha: str
repo: str repo: str
modifies: str modifies: str
changed_by: GitHubUser changed_by: GitHubUser
metadata: dict = {"index_fields": ["diff"]} metadata: dict = {"index_fields": ["text"]}
class Comment(DataPoint): class Comment(DataPoint):
comment_id: str comment_id: str
body: str text: str
created_at: str created_at: str
updated_at: str updated_at: str
author_name: str author_name: str
issue_number: int issue_number: int
repo: str repo: str
authored_by: GitHubUser authored_by: GitHubUser
metadata: dict = {"index_fields": ["body"]} metadata: dict = {"index_fields": ["text"]}
class Issue(DataPoint): class Issue(DataPoint):
number: int number: int
title: str text: str
state: str state: str
repository: str repository: str
is_pr: bool is_pr: bool
has_comment: List[Comment] = [] has_comment: List[Comment] = []
metadata: dict = {"index_fields": ["title"]}
class Commit(DataPoint): class Commit(DataPoint):
commit_sha: str commit_sha: str
commit_message: str text: str
commit_date: str commit_date: str
commit_url: str commit_url: str
author_name: str author_name: str
repo: str repo: str
has_change: List[FileChange] = [] has_change: List[FileChange] = []
metadata: dict = {"index_fields": ["commit_message"]}
class Repository(DataPoint): class Repository(DataPoint):
name: str name: str
has_issue: List[Issue] = [] has_issue: List[Issue] = []
has_commit: List[Commit] = [] has_commit: List[Commit] = []
metadata: dict = {"index_fields": ["name"]}

View file

@ -3,15 +3,15 @@ import asyncio
from uuid import uuid5, NAMESPACE_OID from uuid import uuid5, NAMESPACE_OID
from typing import Optional, List, Dict, Any from typing import Optional, List, Dict, Any
from pathlib import Path from pathlib import Path
from cognee.api.v1.search import SearchType
import cognee import cognee
from cognee.low_level import DataPoint, setup as cognee_setup from cognee.low_level import DataPoint, setup as cognee_setup
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
from cognee.tasks.storage import add_data_points from cognee.tasks.storage import add_data_points
from cognee.modules.pipelines.tasks.task import Task from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.pipelines import run_tasks from cognee.modules.pipelines import run_tasks
from cognee.modules.users.methods import get_default_user from cognee.modules.users.methods import get_default_user
from cognee.modules.engine.models.node_set import NodeSet from cognee.modules.engine.models.node_set import NodeSet
from cognee.complex_demos.crewai_demo.src.crewai_demo.github_dev_profile import GitHubDevProfile
from cognee.shared.logging_utils import get_logger from cognee.shared.logging_utils import get_logger
from cognee.complex_demos.crewai_demo.src.crewai_demo.github_ingest import ( from cognee.complex_demos.crewai_demo.src.crewai_demo.github_ingest import (
get_github_data_for_cognee, get_github_data_for_cognee,
@ -22,9 +22,6 @@ from cognee.complex_demos.crewai_demo.src.crewai_demo.github_datapoints import (
GitHubUser, GitHubUser,
Repository, Repository,
File, File,
FileChange,
Comment,
Issue,
Commit, Commit,
) )
@ -37,7 +34,6 @@ from cognee.complex_demos.crewai_demo.src.crewai_demo.github_datapoint_creators
create_file_change_datapoint, create_file_change_datapoint,
create_issue_datapoint, create_issue_datapoint,
create_comment_datapoint, create_comment_datapoint,
create_github_datapoints,
) )
logger = get_logger("github_ingest") logger = get_logger("github_ingest")
@ -74,7 +70,6 @@ def get_or_create_file(
filename: str, filename: str,
repo_name: str, repo_name: str,
files: Dict[str, File], files: Dict[str, File],
repository: Repository,
technical_nodeset: NodeSet, technical_nodeset: NodeSet,
) -> File: ) -> File:
file_key = f"{repo_name}:{filename}" file_key = f"{repo_name}:{filename}"
@ -134,7 +129,7 @@ def process_file_changes_data(
if not repo_name or not filename or not commit_sha: if not repo_name or not filename or not commit_sha:
continue continue
repository = get_or_create_repository(repo_name, repositories, user, [technical_nodeset]) repository = get_or_create_repository(repo_name, repositories, user, [technical_nodeset])
file = get_or_create_file(filename, repo_name, files, repository, technical_nodeset) file = get_or_create_file(filename, repo_name, files, technical_nodeset)
commit = get_or_create_commit(fc_data, user, commits, repository, technical_nodeset) commit = get_or_create_commit(fc_data, user, commits, repository, technical_nodeset)
file_change = create_file_change_datapoint(fc_data, user, file, [technical_nodeset]) file_change = create_file_change_datapoint(fc_data, user, file, [technical_nodeset])
file_changes_list.append(file_change) file_changes_list.append(file_change)
@ -246,6 +241,7 @@ async def cognify_github_data_from_username(
skip_no_diff: bool = True, skip_no_diff: bool = True,
): ):
"""Fetches GitHub data for a username and processes it through the DataPoint pipeline.""" """Fetches GitHub data for a username and processes it through the DataPoint pipeline."""
logger.info(f"Fetching GitHub data for user: {username}") logger.info(f"Fetching GitHub data for user: {username}")
github_data = get_github_data_for_cognee( github_data = get_github_data_for_cognee(
@ -265,7 +261,9 @@ async def cognify_github_data_from_username(
github_data = json.loads(json.dumps(github_data, default=str)) github_data = json.loads(json.dumps(github_data, default=str))
return await cognify_github_data(github_data) await cognify_github_data(github_data)
return None
async def process_github_from_file(json_file_path: str): async def process_github_from_file(json_file_path: str):
@ -295,5 +293,6 @@ if __name__ == "__main__":
# asyncio.run(process_github_from_file(json_file_path)) # asyncio.run(process_github_from_file(json_file_path))
# #
# Option 2: Process directly from GitHub # Option 2: Process directly from GitHub
username = "" username = ""
asyncio.run(cognify_github_data_from_username(username, token)) asyncio.run(cognify_github_data_from_username(username, token))

View file

@ -2,6 +2,8 @@
import os import os
import warnings import warnings
import cognee import cognee
from cognee.modules.engine.models import NodeSet
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
from hiring_crew import HiringCrew from hiring_crew import HiringCrew
# from crewai_demo.cognify_crew import CognifyCrew # from crewai_demo.cognify_crew import CognifyCrew

View file

@ -250,14 +250,9 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
if len(vector_list) == 0: if len(vector_list) == 0:
return [] return []
# Normalize vector distance and add this as score information to vector_list
normalized_values = normalize_distances(vector_list)
for i in range(0, len(normalized_values)):
vector_list[i]["score"] = normalized_values[i]
# Create and return ScoredResult objects # Create and return ScoredResult objects
return [ return [
ScoredResult(id=row.get("id"), payload=row.get("payload"), score=row.get("score")) ScoredResult(id=row.get("id"), payload=row.get("payload"), score=row.get("_distance"))
for row in vector_list for row in vector_list
] ]

View file

@ -5,4 +5,3 @@ class NodeSet(DataPoint):
"""NodeSet data point.""" """NodeSet data point."""
name: str name: str
metadata: dict = {"index_fields": ["name"]}