First working raw crewai demo prototype

This commit is contained in:
hajdul88 2025-05-15 14:41:46 +02:00
parent f825732eb2
commit ce14a441af
9 changed files with 42 additions and 47 deletions

View file

@ -4,9 +4,9 @@ soft_skills_assessment_task:
Evaluate the comments based on the communications clarity, community engagement, and general kindness.
Please make the output in a way that it expresses the soft skills of the person.
The people to evaluate are:
-Dean P.
-Thomas M.
You can search them by asking comments by 'person', however you are allowed
-lxobr
-hajdul88
You can search them by asking comments authored by the 'person', however you are allowed
and encouraged to ask multiple questions.
expected_output: >
results strictly containing:
@ -24,9 +24,9 @@ technical_assessment_task:
along with narrative comments detailing your findings. Use strictly the technical input
when you are scoring the candidate, you are not allowed to make any comment about soft skills.
The people to evaluate are:
-Dean P.
-Thomas M.
You can search them by code written by 'person', however you are allowed
-lxobr
-hajdul88
You can search them by code changed by the 'person', however you are allowed
and encouraged to ask multiple questions.
expected_output: >
results strictly containing:
@ -42,8 +42,8 @@ hiring_decision_task:
Review the technical_assessment_task and soft_skills_assessment_task outputs,
then decide HIRE or NO_HIRE for each candidate with a detailed reasoning.
The people to evaluate are:
-Dean P.
-Thomas M.
-lxobr
-hajdul88
We have to hire one of them.
expected_output: >
A string strictly containing the following for each person:

View file

@ -3,6 +3,7 @@ from typing import Type, List, Optional
from pydantic import BaseModel, Field, PrivateAttr
from cognee.api.v1.search import SearchType
from cognee.modules.engine.models import NodeSet
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
from cognee.modules.search.methods import search
from cognee.modules.users.methods import get_default_user
@ -28,12 +29,13 @@ class CogneeSearch(BaseTool):
async def main():
try:
print(kwargs.get("query"))
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION,
query_text=kwargs.get("query"),
search_results = await GraphCompletionRetriever(
top_k=10,
node_type=NodeSet,
node_name=self._nodeset_name,
)
).get_context(query=kwargs.get("query"))
return search_results
except Exception as e:
return f"Error: {str(e)}"

View file

@ -1,4 +1,7 @@
from crewai.tools import BaseTool
from cognee.modules.engine.models import NodeSet
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
from ..github_ingest_datapoints import cognify_github_data_from_username
@ -21,6 +24,7 @@ class GithubIngestion(BaseTool):
await cognify_github_data_from_username(applicant_1, token)
await cognify_github_data_from_username(applicant_2, token)
return "Github ingestion finished"
except Exception as e:
return f"Error: {str(e)}"

View file

@ -26,8 +26,7 @@ def create_github_user_datapoint(user_data, nodesets: List[NodeSet]):
user = GitHubUser(
id=user_id,
login=user_data.get("login", ""),
name=user_data.get("name"),
name=user_data.get("login", ""),
bio=user_data.get("bio"),
company=user_data.get("company"),
location=user_data.get("location"),
@ -74,7 +73,7 @@ def create_commit_datapoint(
commit = Commit(
id=commit_id,
commit_sha=commit_data.get("commit_sha", ""),
commit_message=commit_data.get("commit_message", ""),
text="Commit message:" + (str)(commit_data.get("commit_message", "")),
commit_date=commit_data.get("commit_date", ""),
commit_url=commit_data.get("commit_url", ""),
author_name=commit_data.get("login", ""),
@ -102,7 +101,7 @@ def create_file_change_datapoint(
additions=fc_data.get("additions", 0),
deletions=fc_data.get("deletions", 0),
changes=fc_data.get("changes", 0),
diff=fc_data.get("diff", ""),
text=fc_data.get("diff", ""),
commit_sha=fc_data.get("commit_sha", ""),
repo=fc_data.get("repo", ""),
modifies=file.filename,
@ -123,7 +122,7 @@ def create_issue_datapoint(
issue = Issue(
id=issue_id,
number=issue_data.get("issue_number", 0),
title=issue_data.get("issue_title", ""),
text=issue_data.get("issue_title", ""),
state=issue_data.get("issue_state", ""),
repository=repo_name,
is_pr=False,
@ -144,7 +143,7 @@ def create_comment_datapoint(
comment = Comment(
id=comment_id,
comment_id=str(comment_data.get("comment_id", "")),
body=comment_data.get("body", ""),
text=comment_data.get("body", ""),
created_at=comment_data.get("created_at", ""),
updated_at=comment_data.get("updated_at", ""),
author_name=comment_data.get("login", ""),

View file

@ -1,7 +1,6 @@
from uuid import uuid5, NAMESPACE_OID
from typing import Optional, List
from cognee.low_level import DataPoint
from cognee.modules.engine.models.node_set import NodeSet
from cognee.infrastructure.engine import DataPoint
class File(DataPoint):
@ -13,7 +12,6 @@ class File(DataPoint):
class GitHubUser(DataPoint):
login: str
name: Optional[str]
bio: Optional[str]
company: Optional[str]
@ -22,7 +20,7 @@ class GitHubUser(DataPoint):
followers: int
following: int
interacts_with: List["Repository"] = []
metadata: dict = {"index_fields": ["login"]}
metadata: dict = {"index_fields": ["name"]}
class FileChange(DataPoint):
@ -31,49 +29,46 @@ class FileChange(DataPoint):
additions: int
deletions: int
changes: int
diff: str
text: str
commit_sha: str
repo: str
modifies: str
changed_by: GitHubUser
metadata: dict = {"index_fields": ["diff"]}
metadata: dict = {"index_fields": ["text"]}
class Comment(DataPoint):
comment_id: str
body: str
text: str
created_at: str
updated_at: str
author_name: str
issue_number: int
repo: str
authored_by: GitHubUser
metadata: dict = {"index_fields": ["body"]}
metadata: dict = {"index_fields": ["text"]}
class Issue(DataPoint):
number: int
title: str
text: str
state: str
repository: str
is_pr: bool
has_comment: List[Comment] = []
metadata: dict = {"index_fields": ["title"]}
class Commit(DataPoint):
commit_sha: str
commit_message: str
text: str
commit_date: str
commit_url: str
author_name: str
repo: str
has_change: List[FileChange] = []
metadata: dict = {"index_fields": ["commit_message"]}
class Repository(DataPoint):
name: str
has_issue: List[Issue] = []
has_commit: List[Commit] = []
metadata: dict = {"index_fields": ["name"]}

View file

@ -3,15 +3,15 @@ import asyncio
from uuid import uuid5, NAMESPACE_OID
from typing import Optional, List, Dict, Any
from pathlib import Path
from cognee.api.v1.search import SearchType
import cognee
from cognee.low_level import DataPoint, setup as cognee_setup
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
from cognee.tasks.storage import add_data_points
from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.pipelines import run_tasks
from cognee.modules.users.methods import get_default_user
from cognee.modules.engine.models.node_set import NodeSet
from cognee.complex_demos.crewai_demo.src.crewai_demo.github_dev_profile import GitHubDevProfile
from cognee.shared.logging_utils import get_logger
from cognee.complex_demos.crewai_demo.src.crewai_demo.github_ingest import (
get_github_data_for_cognee,
@ -22,9 +22,6 @@ from cognee.complex_demos.crewai_demo.src.crewai_demo.github_datapoints import (
GitHubUser,
Repository,
File,
FileChange,
Comment,
Issue,
Commit,
)
@ -37,7 +34,6 @@ from cognee.complex_demos.crewai_demo.src.crewai_demo.github_datapoint_creators
create_file_change_datapoint,
create_issue_datapoint,
create_comment_datapoint,
create_github_datapoints,
)
logger = get_logger("github_ingest")
@ -74,7 +70,6 @@ def get_or_create_file(
filename: str,
repo_name: str,
files: Dict[str, File],
repository: Repository,
technical_nodeset: NodeSet,
) -> File:
file_key = f"{repo_name}:{filename}"
@ -134,7 +129,7 @@ def process_file_changes_data(
if not repo_name or not filename or not commit_sha:
continue
repository = get_or_create_repository(repo_name, repositories, user, [technical_nodeset])
file = get_or_create_file(filename, repo_name, files, repository, technical_nodeset)
file = get_or_create_file(filename, repo_name, files, technical_nodeset)
commit = get_or_create_commit(fc_data, user, commits, repository, technical_nodeset)
file_change = create_file_change_datapoint(fc_data, user, file, [technical_nodeset])
file_changes_list.append(file_change)
@ -246,6 +241,7 @@ async def cognify_github_data_from_username(
skip_no_diff: bool = True,
):
"""Fetches GitHub data for a username and processes it through the DataPoint pipeline."""
logger.info(f"Fetching GitHub data for user: {username}")
github_data = get_github_data_for_cognee(
@ -265,7 +261,9 @@ async def cognify_github_data_from_username(
github_data = json.loads(json.dumps(github_data, default=str))
return await cognify_github_data(github_data)
await cognify_github_data(github_data)
return None
async def process_github_from_file(json_file_path: str):
@ -295,5 +293,6 @@ if __name__ == "__main__":
# asyncio.run(process_github_from_file(json_file_path))
#
# Option 2: Process directly from GitHub
username = ""
asyncio.run(cognify_github_data_from_username(username, token))

View file

@ -2,6 +2,8 @@
import os
import warnings
import cognee
from cognee.modules.engine.models import NodeSet
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
from hiring_crew import HiringCrew
# from crewai_demo.cognify_crew import CognifyCrew

View file

@ -250,14 +250,9 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
if len(vector_list) == 0:
return []
# Normalize vector distance and add this as score information to vector_list
normalized_values = normalize_distances(vector_list)
for i in range(0, len(normalized_values)):
vector_list[i]["score"] = normalized_values[i]
# Create and return ScoredResult objects
return [
ScoredResult(id=row.get("id"), payload=row.get("payload"), score=row.get("score"))
ScoredResult(id=row.get("id"), payload=row.get("payload"), score=row.get("_distance"))
for row in vector_list
]

View file

@ -5,4 +5,3 @@ class NodeSet(DataPoint):
"""NodeSet data point."""
name: str
metadata: dict = {"index_fields": ["name"]}