First working raw crewai demo prototype
This commit is contained in:
parent
f825732eb2
commit
ce14a441af
9 changed files with 42 additions and 47 deletions
|
|
@ -4,9 +4,9 @@ soft_skills_assessment_task:
|
||||||
Evaluate the comments based on the communications clarity, community engagement, and general kindness.
|
Evaluate the comments based on the communications clarity, community engagement, and general kindness.
|
||||||
Please make the output in a way that it expresses the soft skills of the person.
|
Please make the output in a way that it expresses the soft skills of the person.
|
||||||
The people to evaluate are:
|
The people to evaluate are:
|
||||||
-Dean P.
|
-lxobr
|
||||||
-Thomas M.
|
-hajdul88
|
||||||
You can search them by asking comments by 'person', however you are allowed
|
You can search them by asking comments authored by the 'person', however you are allowed
|
||||||
and encouraged to ask multiple questions.
|
and encouraged to ask multiple questions.
|
||||||
expected_output: >
|
expected_output: >
|
||||||
results strictly containing:
|
results strictly containing:
|
||||||
|
|
@ -24,9 +24,9 @@ technical_assessment_task:
|
||||||
along with narrative comments detailing your findings. Use strictly the technical input
|
along with narrative comments detailing your findings. Use strictly the technical input
|
||||||
when you are scoring the candidate, you are not allowed to make any comment about soft skills.
|
when you are scoring the candidate, you are not allowed to make any comment about soft skills.
|
||||||
The people to evaluate are:
|
The people to evaluate are:
|
||||||
-Dean P.
|
-lxobr
|
||||||
-Thomas M.
|
-hajdul88
|
||||||
You can search them by code written by 'person', however you are allowed
|
You can search them by code changed by the 'person', however you are allowed
|
||||||
and encouraged to ask multiple questions.
|
and encouraged to ask multiple questions.
|
||||||
expected_output: >
|
expected_output: >
|
||||||
results strictly containing:
|
results strictly containing:
|
||||||
|
|
@ -42,8 +42,8 @@ hiring_decision_task:
|
||||||
Review the technical_assessment_task and soft_skills_assessment_task outputs,
|
Review the technical_assessment_task and soft_skills_assessment_task outputs,
|
||||||
then decide HIRE or NO_HIRE for each candidate with a detailed reasoning.
|
then decide HIRE or NO_HIRE for each candidate with a detailed reasoning.
|
||||||
The people to evaluate are:
|
The people to evaluate are:
|
||||||
-Dean P.
|
-lxobr
|
||||||
-Thomas M.
|
-hajdul88
|
||||||
We have to hire one of them.
|
We have to hire one of them.
|
||||||
expected_output: >
|
expected_output: >
|
||||||
A string strictly containing the following for each person:
|
A string strictly containing the following for each person:
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ from typing import Type, List, Optional
|
||||||
from pydantic import BaseModel, Field, PrivateAttr
|
from pydantic import BaseModel, Field, PrivateAttr
|
||||||
from cognee.api.v1.search import SearchType
|
from cognee.api.v1.search import SearchType
|
||||||
from cognee.modules.engine.models import NodeSet
|
from cognee.modules.engine.models import NodeSet
|
||||||
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
||||||
from cognee.modules.search.methods import search
|
from cognee.modules.search.methods import search
|
||||||
from cognee.modules.users.methods import get_default_user
|
from cognee.modules.users.methods import get_default_user
|
||||||
|
|
||||||
|
|
@ -28,12 +29,13 @@ class CogneeSearch(BaseTool):
|
||||||
async def main():
|
async def main():
|
||||||
try:
|
try:
|
||||||
print(kwargs.get("query"))
|
print(kwargs.get("query"))
|
||||||
search_results = await cognee.search(
|
|
||||||
query_type=SearchType.GRAPH_COMPLETION,
|
search_results = await GraphCompletionRetriever(
|
||||||
query_text=kwargs.get("query"),
|
top_k=10,
|
||||||
node_type=NodeSet,
|
node_type=NodeSet,
|
||||||
node_name=self._nodeset_name,
|
node_name=self._nodeset_name,
|
||||||
)
|
).get_context(query=kwargs.get("query"))
|
||||||
|
|
||||||
return search_results
|
return search_results
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error: {str(e)}"
|
return f"Error: {str(e)}"
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,7 @@
|
||||||
from crewai.tools import BaseTool
|
from crewai.tools import BaseTool
|
||||||
|
|
||||||
|
from cognee.modules.engine.models import NodeSet
|
||||||
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
||||||
from ..github_ingest_datapoints import cognify_github_data_from_username
|
from ..github_ingest_datapoints import cognify_github_data_from_username
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -21,6 +24,7 @@ class GithubIngestion(BaseTool):
|
||||||
|
|
||||||
await cognify_github_data_from_username(applicant_1, token)
|
await cognify_github_data_from_username(applicant_1, token)
|
||||||
await cognify_github_data_from_username(applicant_2, token)
|
await cognify_github_data_from_username(applicant_2, token)
|
||||||
|
|
||||||
return "Github ingestion finished"
|
return "Github ingestion finished"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error: {str(e)}"
|
return f"Error: {str(e)}"
|
||||||
|
|
|
||||||
|
|
@ -26,8 +26,7 @@ def create_github_user_datapoint(user_data, nodesets: List[NodeSet]):
|
||||||
|
|
||||||
user = GitHubUser(
|
user = GitHubUser(
|
||||||
id=user_id,
|
id=user_id,
|
||||||
login=user_data.get("login", ""),
|
name=user_data.get("login", ""),
|
||||||
name=user_data.get("name"),
|
|
||||||
bio=user_data.get("bio"),
|
bio=user_data.get("bio"),
|
||||||
company=user_data.get("company"),
|
company=user_data.get("company"),
|
||||||
location=user_data.get("location"),
|
location=user_data.get("location"),
|
||||||
|
|
@ -74,7 +73,7 @@ def create_commit_datapoint(
|
||||||
commit = Commit(
|
commit = Commit(
|
||||||
id=commit_id,
|
id=commit_id,
|
||||||
commit_sha=commit_data.get("commit_sha", ""),
|
commit_sha=commit_data.get("commit_sha", ""),
|
||||||
commit_message=commit_data.get("commit_message", ""),
|
text="Commit message:" + (str)(commit_data.get("commit_message", "")),
|
||||||
commit_date=commit_data.get("commit_date", ""),
|
commit_date=commit_data.get("commit_date", ""),
|
||||||
commit_url=commit_data.get("commit_url", ""),
|
commit_url=commit_data.get("commit_url", ""),
|
||||||
author_name=commit_data.get("login", ""),
|
author_name=commit_data.get("login", ""),
|
||||||
|
|
@ -102,7 +101,7 @@ def create_file_change_datapoint(
|
||||||
additions=fc_data.get("additions", 0),
|
additions=fc_data.get("additions", 0),
|
||||||
deletions=fc_data.get("deletions", 0),
|
deletions=fc_data.get("deletions", 0),
|
||||||
changes=fc_data.get("changes", 0),
|
changes=fc_data.get("changes", 0),
|
||||||
diff=fc_data.get("diff", ""),
|
text=fc_data.get("diff", ""),
|
||||||
commit_sha=fc_data.get("commit_sha", ""),
|
commit_sha=fc_data.get("commit_sha", ""),
|
||||||
repo=fc_data.get("repo", ""),
|
repo=fc_data.get("repo", ""),
|
||||||
modifies=file.filename,
|
modifies=file.filename,
|
||||||
|
|
@ -123,7 +122,7 @@ def create_issue_datapoint(
|
||||||
issue = Issue(
|
issue = Issue(
|
||||||
id=issue_id,
|
id=issue_id,
|
||||||
number=issue_data.get("issue_number", 0),
|
number=issue_data.get("issue_number", 0),
|
||||||
title=issue_data.get("issue_title", ""),
|
text=issue_data.get("issue_title", ""),
|
||||||
state=issue_data.get("issue_state", ""),
|
state=issue_data.get("issue_state", ""),
|
||||||
repository=repo_name,
|
repository=repo_name,
|
||||||
is_pr=False,
|
is_pr=False,
|
||||||
|
|
@ -144,7 +143,7 @@ def create_comment_datapoint(
|
||||||
comment = Comment(
|
comment = Comment(
|
||||||
id=comment_id,
|
id=comment_id,
|
||||||
comment_id=str(comment_data.get("comment_id", "")),
|
comment_id=str(comment_data.get("comment_id", "")),
|
||||||
body=comment_data.get("body", ""),
|
text=comment_data.get("body", ""),
|
||||||
created_at=comment_data.get("created_at", ""),
|
created_at=comment_data.get("created_at", ""),
|
||||||
updated_at=comment_data.get("updated_at", ""),
|
updated_at=comment_data.get("updated_at", ""),
|
||||||
author_name=comment_data.get("login", ""),
|
author_name=comment_data.get("login", ""),
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
from uuid import uuid5, NAMESPACE_OID
|
from uuid import uuid5, NAMESPACE_OID
|
||||||
from typing import Optional, List
|
from typing import Optional, List
|
||||||
from cognee.low_level import DataPoint
|
from cognee.infrastructure.engine import DataPoint
|
||||||
from cognee.modules.engine.models.node_set import NodeSet
|
|
||||||
|
|
||||||
|
|
||||||
class File(DataPoint):
|
class File(DataPoint):
|
||||||
|
|
@ -13,7 +12,6 @@ class File(DataPoint):
|
||||||
|
|
||||||
|
|
||||||
class GitHubUser(DataPoint):
|
class GitHubUser(DataPoint):
|
||||||
login: str
|
|
||||||
name: Optional[str]
|
name: Optional[str]
|
||||||
bio: Optional[str]
|
bio: Optional[str]
|
||||||
company: Optional[str]
|
company: Optional[str]
|
||||||
|
|
@ -22,7 +20,7 @@ class GitHubUser(DataPoint):
|
||||||
followers: int
|
followers: int
|
||||||
following: int
|
following: int
|
||||||
interacts_with: List["Repository"] = []
|
interacts_with: List["Repository"] = []
|
||||||
metadata: dict = {"index_fields": ["login"]}
|
metadata: dict = {"index_fields": ["name"]}
|
||||||
|
|
||||||
|
|
||||||
class FileChange(DataPoint):
|
class FileChange(DataPoint):
|
||||||
|
|
@ -31,49 +29,46 @@ class FileChange(DataPoint):
|
||||||
additions: int
|
additions: int
|
||||||
deletions: int
|
deletions: int
|
||||||
changes: int
|
changes: int
|
||||||
diff: str
|
text: str
|
||||||
commit_sha: str
|
commit_sha: str
|
||||||
repo: str
|
repo: str
|
||||||
modifies: str
|
modifies: str
|
||||||
changed_by: GitHubUser
|
changed_by: GitHubUser
|
||||||
metadata: dict = {"index_fields": ["diff"]}
|
metadata: dict = {"index_fields": ["text"]}
|
||||||
|
|
||||||
|
|
||||||
class Comment(DataPoint):
|
class Comment(DataPoint):
|
||||||
comment_id: str
|
comment_id: str
|
||||||
body: str
|
text: str
|
||||||
created_at: str
|
created_at: str
|
||||||
updated_at: str
|
updated_at: str
|
||||||
author_name: str
|
author_name: str
|
||||||
issue_number: int
|
issue_number: int
|
||||||
repo: str
|
repo: str
|
||||||
authored_by: GitHubUser
|
authored_by: GitHubUser
|
||||||
metadata: dict = {"index_fields": ["body"]}
|
metadata: dict = {"index_fields": ["text"]}
|
||||||
|
|
||||||
|
|
||||||
class Issue(DataPoint):
|
class Issue(DataPoint):
|
||||||
number: int
|
number: int
|
||||||
title: str
|
text: str
|
||||||
state: str
|
state: str
|
||||||
repository: str
|
repository: str
|
||||||
is_pr: bool
|
is_pr: bool
|
||||||
has_comment: List[Comment] = []
|
has_comment: List[Comment] = []
|
||||||
metadata: dict = {"index_fields": ["title"]}
|
|
||||||
|
|
||||||
|
|
||||||
class Commit(DataPoint):
|
class Commit(DataPoint):
|
||||||
commit_sha: str
|
commit_sha: str
|
||||||
commit_message: str
|
text: str
|
||||||
commit_date: str
|
commit_date: str
|
||||||
commit_url: str
|
commit_url: str
|
||||||
author_name: str
|
author_name: str
|
||||||
repo: str
|
repo: str
|
||||||
has_change: List[FileChange] = []
|
has_change: List[FileChange] = []
|
||||||
metadata: dict = {"index_fields": ["commit_message"]}
|
|
||||||
|
|
||||||
|
|
||||||
class Repository(DataPoint):
|
class Repository(DataPoint):
|
||||||
name: str
|
name: str
|
||||||
has_issue: List[Issue] = []
|
has_issue: List[Issue] = []
|
||||||
has_commit: List[Commit] = []
|
has_commit: List[Commit] = []
|
||||||
metadata: dict = {"index_fields": ["name"]}
|
|
||||||
|
|
|
||||||
|
|
@ -3,15 +3,15 @@ import asyncio
|
||||||
from uuid import uuid5, NAMESPACE_OID
|
from uuid import uuid5, NAMESPACE_OID
|
||||||
from typing import Optional, List, Dict, Any
|
from typing import Optional, List, Dict, Any
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from cognee.api.v1.search import SearchType
|
||||||
import cognee
|
import cognee
|
||||||
from cognee.low_level import DataPoint, setup as cognee_setup
|
from cognee.low_level import DataPoint, setup as cognee_setup
|
||||||
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
||||||
from cognee.tasks.storage import add_data_points
|
from cognee.tasks.storage import add_data_points
|
||||||
from cognee.modules.pipelines.tasks.task import Task
|
from cognee.modules.pipelines.tasks.task import Task
|
||||||
from cognee.modules.pipelines import run_tasks
|
from cognee.modules.pipelines import run_tasks
|
||||||
from cognee.modules.users.methods import get_default_user
|
from cognee.modules.users.methods import get_default_user
|
||||||
from cognee.modules.engine.models.node_set import NodeSet
|
from cognee.modules.engine.models.node_set import NodeSet
|
||||||
from cognee.complex_demos.crewai_demo.src.crewai_demo.github_dev_profile import GitHubDevProfile
|
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
from cognee.complex_demos.crewai_demo.src.crewai_demo.github_ingest import (
|
from cognee.complex_demos.crewai_demo.src.crewai_demo.github_ingest import (
|
||||||
get_github_data_for_cognee,
|
get_github_data_for_cognee,
|
||||||
|
|
@ -22,9 +22,6 @@ from cognee.complex_demos.crewai_demo.src.crewai_demo.github_datapoints import (
|
||||||
GitHubUser,
|
GitHubUser,
|
||||||
Repository,
|
Repository,
|
||||||
File,
|
File,
|
||||||
FileChange,
|
|
||||||
Comment,
|
|
||||||
Issue,
|
|
||||||
Commit,
|
Commit,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -37,7 +34,6 @@ from cognee.complex_demos.crewai_demo.src.crewai_demo.github_datapoint_creators
|
||||||
create_file_change_datapoint,
|
create_file_change_datapoint,
|
||||||
create_issue_datapoint,
|
create_issue_datapoint,
|
||||||
create_comment_datapoint,
|
create_comment_datapoint,
|
||||||
create_github_datapoints,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = get_logger("github_ingest")
|
logger = get_logger("github_ingest")
|
||||||
|
|
@ -74,7 +70,6 @@ def get_or_create_file(
|
||||||
filename: str,
|
filename: str,
|
||||||
repo_name: str,
|
repo_name: str,
|
||||||
files: Dict[str, File],
|
files: Dict[str, File],
|
||||||
repository: Repository,
|
|
||||||
technical_nodeset: NodeSet,
|
technical_nodeset: NodeSet,
|
||||||
) -> File:
|
) -> File:
|
||||||
file_key = f"{repo_name}:{filename}"
|
file_key = f"{repo_name}:{filename}"
|
||||||
|
|
@ -134,7 +129,7 @@ def process_file_changes_data(
|
||||||
if not repo_name or not filename or not commit_sha:
|
if not repo_name or not filename or not commit_sha:
|
||||||
continue
|
continue
|
||||||
repository = get_or_create_repository(repo_name, repositories, user, [technical_nodeset])
|
repository = get_or_create_repository(repo_name, repositories, user, [technical_nodeset])
|
||||||
file = get_or_create_file(filename, repo_name, files, repository, technical_nodeset)
|
file = get_or_create_file(filename, repo_name, files, technical_nodeset)
|
||||||
commit = get_or_create_commit(fc_data, user, commits, repository, technical_nodeset)
|
commit = get_or_create_commit(fc_data, user, commits, repository, technical_nodeset)
|
||||||
file_change = create_file_change_datapoint(fc_data, user, file, [technical_nodeset])
|
file_change = create_file_change_datapoint(fc_data, user, file, [technical_nodeset])
|
||||||
file_changes_list.append(file_change)
|
file_changes_list.append(file_change)
|
||||||
|
|
@ -246,6 +241,7 @@ async def cognify_github_data_from_username(
|
||||||
skip_no_diff: bool = True,
|
skip_no_diff: bool = True,
|
||||||
):
|
):
|
||||||
"""Fetches GitHub data for a username and processes it through the DataPoint pipeline."""
|
"""Fetches GitHub data for a username and processes it through the DataPoint pipeline."""
|
||||||
|
|
||||||
logger.info(f"Fetching GitHub data for user: {username}")
|
logger.info(f"Fetching GitHub data for user: {username}")
|
||||||
|
|
||||||
github_data = get_github_data_for_cognee(
|
github_data = get_github_data_for_cognee(
|
||||||
|
|
@ -265,7 +261,9 @@ async def cognify_github_data_from_username(
|
||||||
|
|
||||||
github_data = json.loads(json.dumps(github_data, default=str))
|
github_data = json.loads(json.dumps(github_data, default=str))
|
||||||
|
|
||||||
return await cognify_github_data(github_data)
|
await cognify_github_data(github_data)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def process_github_from_file(json_file_path: str):
|
async def process_github_from_file(json_file_path: str):
|
||||||
|
|
@ -295,5 +293,6 @@ if __name__ == "__main__":
|
||||||
# asyncio.run(process_github_from_file(json_file_path))
|
# asyncio.run(process_github_from_file(json_file_path))
|
||||||
#
|
#
|
||||||
# Option 2: Process directly from GitHub
|
# Option 2: Process directly from GitHub
|
||||||
|
|
||||||
username = ""
|
username = ""
|
||||||
asyncio.run(cognify_github_data_from_username(username, token))
|
asyncio.run(cognify_github_data_from_username(username, token))
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
import os
|
import os
|
||||||
import warnings
|
import warnings
|
||||||
import cognee
|
import cognee
|
||||||
|
from cognee.modules.engine.models import NodeSet
|
||||||
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
||||||
from hiring_crew import HiringCrew
|
from hiring_crew import HiringCrew
|
||||||
|
|
||||||
# from crewai_demo.cognify_crew import CognifyCrew
|
# from crewai_demo.cognify_crew import CognifyCrew
|
||||||
|
|
|
||||||
|
|
@ -250,14 +250,9 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
|
||||||
if len(vector_list) == 0:
|
if len(vector_list) == 0:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Normalize vector distance and add this as score information to vector_list
|
|
||||||
normalized_values = normalize_distances(vector_list)
|
|
||||||
for i in range(0, len(normalized_values)):
|
|
||||||
vector_list[i]["score"] = normalized_values[i]
|
|
||||||
|
|
||||||
# Create and return ScoredResult objects
|
# Create and return ScoredResult objects
|
||||||
return [
|
return [
|
||||||
ScoredResult(id=row.get("id"), payload=row.get("payload"), score=row.get("score"))
|
ScoredResult(id=row.get("id"), payload=row.get("payload"), score=row.get("_distance"))
|
||||||
for row in vector_list
|
for row in vector_list
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,4 +5,3 @@ class NodeSet(DataPoint):
|
||||||
"""NodeSet data point."""
|
"""NodeSet data point."""
|
||||||
|
|
||||||
name: str
|
name: str
|
||||||
metadata: dict = {"index_fields": ["name"]}
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue