First working raw crewai demo prototype
This commit is contained in:
parent
f825732eb2
commit
ce14a441af
9 changed files with 42 additions and 47 deletions
|
|
@ -4,9 +4,9 @@ soft_skills_assessment_task:
|
|||
Evaluate the comments based on the communications clarity, community engagement, and general kindness.
|
||||
Please make the output in a way that it expresses the soft skills of the person.
|
||||
The people to evaluate are:
|
||||
-Dean P.
|
||||
-Thomas M.
|
||||
You can search them by asking comments by 'person', however you are allowed
|
||||
-lxobr
|
||||
-hajdul88
|
||||
You can search them by asking comments authored by the 'person', however you are allowed
|
||||
and encouraged to ask multiple questions.
|
||||
expected_output: >
|
||||
results strictly containing:
|
||||
|
|
@ -24,9 +24,9 @@ technical_assessment_task:
|
|||
along with narrative comments detailing your findings. Use strictly the technical input
|
||||
when you are scoring the candidate, you are not allowed to make any comment about soft skills.
|
||||
The people to evaluate are:
|
||||
-Dean P.
|
||||
-Thomas M.
|
||||
You can search them by code written by 'person', however you are allowed
|
||||
-lxobr
|
||||
-hajdul88
|
||||
You can search them by code changed by the 'person', however you are allowed
|
||||
and encouraged to ask multiple questions.
|
||||
expected_output: >
|
||||
results strictly containing:
|
||||
|
|
@ -42,8 +42,8 @@ hiring_decision_task:
|
|||
Review the technical_assessment_task and soft_skills_assessment_task outputs,
|
||||
then decide HIRE or NO_HIRE for each candidate with a detailed reasoning.
|
||||
The people to evaluate are:
|
||||
-Dean P.
|
||||
-Thomas M.
|
||||
-lxobr
|
||||
-hajdul88
|
||||
We have to hire one of them.
|
||||
expected_output: >
|
||||
A string strictly containing the following for each person:
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ from typing import Type, List, Optional
|
|||
from pydantic import BaseModel, Field, PrivateAttr
|
||||
from cognee.api.v1.search import SearchType
|
||||
from cognee.modules.engine.models import NodeSet
|
||||
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
||||
from cognee.modules.search.methods import search
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
|
||||
|
|
@ -28,12 +29,13 @@ class CogneeSearch(BaseTool):
|
|||
async def main():
|
||||
try:
|
||||
print(kwargs.get("query"))
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION,
|
||||
query_text=kwargs.get("query"),
|
||||
|
||||
search_results = await GraphCompletionRetriever(
|
||||
top_k=10,
|
||||
node_type=NodeSet,
|
||||
node_name=self._nodeset_name,
|
||||
)
|
||||
).get_context(query=kwargs.get("query"))
|
||||
|
||||
return search_results
|
||||
except Exception as e:
|
||||
return f"Error: {str(e)}"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,7 @@
|
|||
from crewai.tools import BaseTool
|
||||
|
||||
from cognee.modules.engine.models import NodeSet
|
||||
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
||||
from ..github_ingest_datapoints import cognify_github_data_from_username
|
||||
|
||||
|
||||
|
|
@ -21,6 +24,7 @@ class GithubIngestion(BaseTool):
|
|||
|
||||
await cognify_github_data_from_username(applicant_1, token)
|
||||
await cognify_github_data_from_username(applicant_2, token)
|
||||
|
||||
return "Github ingestion finished"
|
||||
except Exception as e:
|
||||
return f"Error: {str(e)}"
|
||||
|
|
|
|||
|
|
@ -26,8 +26,7 @@ def create_github_user_datapoint(user_data, nodesets: List[NodeSet]):
|
|||
|
||||
user = GitHubUser(
|
||||
id=user_id,
|
||||
login=user_data.get("login", ""),
|
||||
name=user_data.get("name"),
|
||||
name=user_data.get("login", ""),
|
||||
bio=user_data.get("bio"),
|
||||
company=user_data.get("company"),
|
||||
location=user_data.get("location"),
|
||||
|
|
@ -74,7 +73,7 @@ def create_commit_datapoint(
|
|||
commit = Commit(
|
||||
id=commit_id,
|
||||
commit_sha=commit_data.get("commit_sha", ""),
|
||||
commit_message=commit_data.get("commit_message", ""),
|
||||
text="Commit message:" + (str)(commit_data.get("commit_message", "")),
|
||||
commit_date=commit_data.get("commit_date", ""),
|
||||
commit_url=commit_data.get("commit_url", ""),
|
||||
author_name=commit_data.get("login", ""),
|
||||
|
|
@ -102,7 +101,7 @@ def create_file_change_datapoint(
|
|||
additions=fc_data.get("additions", 0),
|
||||
deletions=fc_data.get("deletions", 0),
|
||||
changes=fc_data.get("changes", 0),
|
||||
diff=fc_data.get("diff", ""),
|
||||
text=fc_data.get("diff", ""),
|
||||
commit_sha=fc_data.get("commit_sha", ""),
|
||||
repo=fc_data.get("repo", ""),
|
||||
modifies=file.filename,
|
||||
|
|
@ -123,7 +122,7 @@ def create_issue_datapoint(
|
|||
issue = Issue(
|
||||
id=issue_id,
|
||||
number=issue_data.get("issue_number", 0),
|
||||
title=issue_data.get("issue_title", ""),
|
||||
text=issue_data.get("issue_title", ""),
|
||||
state=issue_data.get("issue_state", ""),
|
||||
repository=repo_name,
|
||||
is_pr=False,
|
||||
|
|
@ -144,7 +143,7 @@ def create_comment_datapoint(
|
|||
comment = Comment(
|
||||
id=comment_id,
|
||||
comment_id=str(comment_data.get("comment_id", "")),
|
||||
body=comment_data.get("body", ""),
|
||||
text=comment_data.get("body", ""),
|
||||
created_at=comment_data.get("created_at", ""),
|
||||
updated_at=comment_data.get("updated_at", ""),
|
||||
author_name=comment_data.get("login", ""),
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
from uuid import uuid5, NAMESPACE_OID
|
||||
from typing import Optional, List
|
||||
from cognee.low_level import DataPoint
|
||||
from cognee.modules.engine.models.node_set import NodeSet
|
||||
from cognee.infrastructure.engine import DataPoint
|
||||
|
||||
|
||||
class File(DataPoint):
|
||||
|
|
@ -13,7 +12,6 @@ class File(DataPoint):
|
|||
|
||||
|
||||
class GitHubUser(DataPoint):
|
||||
login: str
|
||||
name: Optional[str]
|
||||
bio: Optional[str]
|
||||
company: Optional[str]
|
||||
|
|
@ -22,7 +20,7 @@ class GitHubUser(DataPoint):
|
|||
followers: int
|
||||
following: int
|
||||
interacts_with: List["Repository"] = []
|
||||
metadata: dict = {"index_fields": ["login"]}
|
||||
metadata: dict = {"index_fields": ["name"]}
|
||||
|
||||
|
||||
class FileChange(DataPoint):
|
||||
|
|
@ -31,49 +29,46 @@ class FileChange(DataPoint):
|
|||
additions: int
|
||||
deletions: int
|
||||
changes: int
|
||||
diff: str
|
||||
text: str
|
||||
commit_sha: str
|
||||
repo: str
|
||||
modifies: str
|
||||
changed_by: GitHubUser
|
||||
metadata: dict = {"index_fields": ["diff"]}
|
||||
metadata: dict = {"index_fields": ["text"]}
|
||||
|
||||
|
||||
class Comment(DataPoint):
|
||||
comment_id: str
|
||||
body: str
|
||||
text: str
|
||||
created_at: str
|
||||
updated_at: str
|
||||
author_name: str
|
||||
issue_number: int
|
||||
repo: str
|
||||
authored_by: GitHubUser
|
||||
metadata: dict = {"index_fields": ["body"]}
|
||||
metadata: dict = {"index_fields": ["text"]}
|
||||
|
||||
|
||||
class Issue(DataPoint):
|
||||
number: int
|
||||
title: str
|
||||
text: str
|
||||
state: str
|
||||
repository: str
|
||||
is_pr: bool
|
||||
has_comment: List[Comment] = []
|
||||
metadata: dict = {"index_fields": ["title"]}
|
||||
|
||||
|
||||
class Commit(DataPoint):
|
||||
commit_sha: str
|
||||
commit_message: str
|
||||
text: str
|
||||
commit_date: str
|
||||
commit_url: str
|
||||
author_name: str
|
||||
repo: str
|
||||
has_change: List[FileChange] = []
|
||||
metadata: dict = {"index_fields": ["commit_message"]}
|
||||
|
||||
|
||||
class Repository(DataPoint):
|
||||
name: str
|
||||
has_issue: List[Issue] = []
|
||||
has_commit: List[Commit] = []
|
||||
metadata: dict = {"index_fields": ["name"]}
|
||||
|
|
|
|||
|
|
@ -3,15 +3,15 @@ import asyncio
|
|||
from uuid import uuid5, NAMESPACE_OID
|
||||
from typing import Optional, List, Dict, Any
|
||||
from pathlib import Path
|
||||
|
||||
from cognee.api.v1.search import SearchType
|
||||
import cognee
|
||||
from cognee.low_level import DataPoint, setup as cognee_setup
|
||||
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
||||
from cognee.tasks.storage import add_data_points
|
||||
from cognee.modules.pipelines.tasks.task import Task
|
||||
from cognee.modules.pipelines import run_tasks
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.modules.engine.models.node_set import NodeSet
|
||||
from cognee.complex_demos.crewai_demo.src.crewai_demo.github_dev_profile import GitHubDevProfile
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.complex_demos.crewai_demo.src.crewai_demo.github_ingest import (
|
||||
get_github_data_for_cognee,
|
||||
|
|
@ -22,9 +22,6 @@ from cognee.complex_demos.crewai_demo.src.crewai_demo.github_datapoints import (
|
|||
GitHubUser,
|
||||
Repository,
|
||||
File,
|
||||
FileChange,
|
||||
Comment,
|
||||
Issue,
|
||||
Commit,
|
||||
)
|
||||
|
||||
|
|
@ -37,7 +34,6 @@ from cognee.complex_demos.crewai_demo.src.crewai_demo.github_datapoint_creators
|
|||
create_file_change_datapoint,
|
||||
create_issue_datapoint,
|
||||
create_comment_datapoint,
|
||||
create_github_datapoints,
|
||||
)
|
||||
|
||||
logger = get_logger("github_ingest")
|
||||
|
|
@ -74,7 +70,6 @@ def get_or_create_file(
|
|||
filename: str,
|
||||
repo_name: str,
|
||||
files: Dict[str, File],
|
||||
repository: Repository,
|
||||
technical_nodeset: NodeSet,
|
||||
) -> File:
|
||||
file_key = f"{repo_name}:{filename}"
|
||||
|
|
@ -134,7 +129,7 @@ def process_file_changes_data(
|
|||
if not repo_name or not filename or not commit_sha:
|
||||
continue
|
||||
repository = get_or_create_repository(repo_name, repositories, user, [technical_nodeset])
|
||||
file = get_or_create_file(filename, repo_name, files, repository, technical_nodeset)
|
||||
file = get_or_create_file(filename, repo_name, files, technical_nodeset)
|
||||
commit = get_or_create_commit(fc_data, user, commits, repository, technical_nodeset)
|
||||
file_change = create_file_change_datapoint(fc_data, user, file, [technical_nodeset])
|
||||
file_changes_list.append(file_change)
|
||||
|
|
@ -246,6 +241,7 @@ async def cognify_github_data_from_username(
|
|||
skip_no_diff: bool = True,
|
||||
):
|
||||
"""Fetches GitHub data for a username and processes it through the DataPoint pipeline."""
|
||||
|
||||
logger.info(f"Fetching GitHub data for user: {username}")
|
||||
|
||||
github_data = get_github_data_for_cognee(
|
||||
|
|
@ -265,7 +261,9 @@ async def cognify_github_data_from_username(
|
|||
|
||||
github_data = json.loads(json.dumps(github_data, default=str))
|
||||
|
||||
return await cognify_github_data(github_data)
|
||||
await cognify_github_data(github_data)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def process_github_from_file(json_file_path: str):
|
||||
|
|
@ -295,5 +293,6 @@ if __name__ == "__main__":
|
|||
# asyncio.run(process_github_from_file(json_file_path))
|
||||
#
|
||||
# Option 2: Process directly from GitHub
|
||||
|
||||
username = ""
|
||||
asyncio.run(cognify_github_data_from_username(username, token))
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
import os
|
||||
import warnings
|
||||
import cognee
|
||||
from cognee.modules.engine.models import NodeSet
|
||||
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
||||
from hiring_crew import HiringCrew
|
||||
|
||||
# from crewai_demo.cognify_crew import CognifyCrew
|
||||
|
|
|
|||
|
|
@ -250,14 +250,9 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
|
|||
if len(vector_list) == 0:
|
||||
return []
|
||||
|
||||
# Normalize vector distance and add this as score information to vector_list
|
||||
normalized_values = normalize_distances(vector_list)
|
||||
for i in range(0, len(normalized_values)):
|
||||
vector_list[i]["score"] = normalized_values[i]
|
||||
|
||||
# Create and return ScoredResult objects
|
||||
return [
|
||||
ScoredResult(id=row.get("id"), payload=row.get("payload"), score=row.get("score"))
|
||||
ScoredResult(id=row.get("id"), payload=row.get("payload"), score=row.get("_distance"))
|
||||
for row in vector_list
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -5,4 +5,3 @@ class NodeSet(DataPoint):
|
|||
"""NodeSet data point."""
|
||||
|
||||
name: str
|
||||
metadata: dict = {"index_fields": ["name"]}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue