Merge branch 'dev' into COG-970-refactor-tokenizing
This commit is contained in:
commit
77a72851fc
12 changed files with 211 additions and 29 deletions
40
.github/workflows/profiling.yaml
vendored
40
.github/workflows/profiling.yaml
vendored
|
|
@ -68,32 +68,32 @@ jobs:
|
||||||
echo "HEAD_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
|
echo "HEAD_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
|
||||||
|
|
||||||
# Run profiler on the base branch
|
# Run profiler on the base branch
|
||||||
- name: Run profiler on base branch
|
# - name: Run profiler on base branch
|
||||||
env:
|
|
||||||
BASE_SHA: ${{ env.BASE_SHA }}
|
|
||||||
run: |
|
|
||||||
echo "Profiling the base branch for code_graph_pipeline.py"
|
|
||||||
echo "Checking out base SHA: $BASE_SHA"
|
|
||||||
git checkout $BASE_SHA
|
|
||||||
echo "This is the working directory: $PWD"
|
|
||||||
# Ensure the script is executable
|
|
||||||
chmod +x cognee/api/v1/cognify/code_graph_pipeline.py
|
|
||||||
# Run Scalene
|
|
||||||
poetry run pyinstrument --renderer json -o base_results.json cognee/api/v1/cognify/code_graph_pipeline.py
|
|
||||||
|
|
||||||
# Run profiler on head branch
|
|
||||||
# - name: Run profiler on head branch
|
|
||||||
# env:
|
# env:
|
||||||
# HEAD_SHA: ${{ env.HEAD_SHA }}
|
# BASE_SHA: ${{ env.BASE_SHA }}
|
||||||
# run: |
|
# run: |
|
||||||
# echo "Profiling the head branch for code_graph_pipeline.py"
|
# echo "Profiling the base branch for code_graph_pipeline.py"
|
||||||
# echo "Checking out head SHA: $HEAD_SHA"
|
# echo "Checking out base SHA: $BASE_SHA"
|
||||||
# git checkout $HEAD_SHA
|
# git checkout $BASE_SHA
|
||||||
# echo "This is the working directory: $PWD"
|
# echo "This is the working directory: $PWD"
|
||||||
# # Ensure the script is executable
|
# # Ensure the script is executable
|
||||||
# chmod +x cognee/api/v1/cognify/code_graph_pipeline.py
|
# chmod +x cognee/api/v1/cognify/code_graph_pipeline.py
|
||||||
# # Run Scalene
|
# # Run Scalene
|
||||||
# poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py
|
# poetry run pyinstrument --renderer json -o base_results.json cognee/api/v1/cognify/code_graph_pipeline.py
|
||||||
|
|
||||||
|
# Run profiler on head branch
|
||||||
|
- name: Run profiler on head branch
|
||||||
|
env:
|
||||||
|
HEAD_SHA: ${{ env.HEAD_SHA }}
|
||||||
|
run: |
|
||||||
|
echo "Profiling the head branch for code_graph_pipeline.py"
|
||||||
|
echo "Checking out head SHA: $HEAD_SHA"
|
||||||
|
git checkout $HEAD_SHA
|
||||||
|
echo "This is the working directory: $PWD"
|
||||||
|
# Ensure the script is executable
|
||||||
|
chmod +x cognee/api/v1/cognify/code_graph_pipeline.py
|
||||||
|
# Run Scalene
|
||||||
|
poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py
|
||||||
|
|
||||||
# # Compare profiling results
|
# # Compare profiling results
|
||||||
# - name: Compare profiling results
|
# - name: Compare profiling results
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,10 @@ on:
|
||||||
description: "Location of example script to run"
|
description: "Location of example script to run"
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
|
arguments:
|
||||||
|
description: "Arguments for example script"
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
secrets:
|
secrets:
|
||||||
GRAPHISTRY_USERNAME:
|
GRAPHISTRY_USERNAME:
|
||||||
required: true
|
required: true
|
||||||
|
|
@ -53,4 +57,4 @@ jobs:
|
||||||
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
|
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
|
||||||
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
|
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
|
||||||
run: poetry run python ${{ inputs.example-location }}
|
run: poetry run python ${{ inputs.example-location }} ${{ inputs.arguments }}
|
||||||
|
|
|
||||||
22
.github/workflows/test_code_graph_example.yml
vendored
Normal file
22
.github/workflows/test_code_graph_example.yml
vendored
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
name: test | code graph example
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
pull_request:
|
||||||
|
types: [labeled, synchronize]
|
||||||
|
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
run_simple_example_test:
|
||||||
|
uses: ./.github/workflows/reusable_python_example.yml
|
||||||
|
with:
|
||||||
|
example-location: ./examples/python/code_graph_example.py
|
||||||
|
arguments: "--repo_path ./evals"
|
||||||
|
secrets:
|
||||||
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
|
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
|
||||||
|
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
|
||||||
32
Dockerfile_modal
Normal file
32
Dockerfile_modal
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV PIP_NO_CACHE_DIR=true
|
||||||
|
ENV PATH="${PATH}:/root/.poetry/bin"
|
||||||
|
ENV PYTHONPATH=/app
|
||||||
|
ENV RUN_MODE=modal
|
||||||
|
ENV SKIP_MIGRATIONS=true
|
||||||
|
|
||||||
|
# System dependencies
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
gcc \
|
||||||
|
libpq-dev \
|
||||||
|
git \
|
||||||
|
curl \
|
||||||
|
build-essential \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
|
||||||
|
ENV PYTHONPATH=/app
|
||||||
|
WORKDIR /app
|
||||||
|
COPY pyproject.toml poetry.lock /app/
|
||||||
|
|
||||||
|
|
||||||
|
RUN pip install poetry
|
||||||
|
|
||||||
|
RUN poetry install --all-extras --no-root --without dev
|
||||||
|
|
||||||
|
COPY cognee/ /app/cognee
|
||||||
|
COPY README.md /app/README.md
|
||||||
22
README.md
22
README.md
|
|
@ -241,6 +241,28 @@ Please see the cognee [Development Guide](https://docs.cognee.ai/quickstart/) fo
|
||||||
```bash
|
```bash
|
||||||
pip install cognee
|
pip install cognee
|
||||||
```
|
```
|
||||||
|
### Deployment at Scale (Modal)
|
||||||
|
|
||||||
|
Scale cognee in 4(+1) simple steps to handle enterprise workloads using [Modal](https://modal.com)'s GPU-powered infrastructure
|
||||||
|
|
||||||
|
**1. Install the modal python client**
|
||||||
|
```bash
|
||||||
|
pip install modal
|
||||||
|
```
|
||||||
|
**2. Create a free account on [Modal](https://modal.com)**
|
||||||
|
|
||||||
|
**3. Set Up Modal API Key**
|
||||||
|
```bash
|
||||||
|
modal token set --token-id TOKEN_ID --token-secret TOKEN_SECRET --profile=PROFILE
|
||||||
|
modal profile activate PROFILE
|
||||||
|
```
|
||||||
|
**4. Run cognee example**
|
||||||
|
|
||||||
|
This simple example will deploy separate cognee instances building their own memory stores and answering a list of questions at scale.
|
||||||
|
```bash
|
||||||
|
modal run -d modal_deployment.py
|
||||||
|
```
|
||||||
|
**5. Change the modal_deploy script and develop your own AI memory at scale 🚀**
|
||||||
|
|
||||||
## 💫 Contributors
|
## 💫 Contributors
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
from .server import mcp
|
from .server import mcp
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main entry point for the package."""
|
"""Main entry point for the package."""
|
||||||
mcp.run(transport="stdio")
|
mcp.run(transport="stdio")
|
||||||
|
|
|
||||||
|
|
@ -4,9 +4,9 @@ from mcp.client.stdio import stdio_client
|
||||||
|
|
||||||
# Create server parameters for stdio connection
|
# Create server parameters for stdio connection
|
||||||
server_params = StdioServerParameters(
|
server_params = StdioServerParameters(
|
||||||
command="mcp", # Executable
|
command="mcp", # Executable
|
||||||
args=["run", "src/server.py"], # Optional command line arguments
|
args=["run", "src/server.py"], # Optional command line arguments
|
||||||
env=None # Optional environment variables
|
env=None, # Optional environment variables
|
||||||
)
|
)
|
||||||
|
|
||||||
text = """
|
text = """
|
||||||
|
|
@ -27,6 +27,7 @@ brain, that can ‘learn’ from available data and make increasingly
|
||||||
more accurate classifications or predictions over time.
|
more accurate classifications or predictions over time.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
async def run():
|
async def run():
|
||||||
async with stdio_client(server_params) as (read, write):
|
async with stdio_client(server_params) as (read, write):
|
||||||
async with ClientSession(read, write, timedelta(minutes=3)) as session:
|
async with ClientSession(read, write, timedelta(minutes=3)) as session:
|
||||||
|
|
@ -37,6 +38,8 @@ async def run():
|
||||||
|
|
||||||
print(f"Cognify result: {toolResult}")
|
print(f"Cognify result: {toolResult}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
asyncio.run(run())
|
asyncio.run(run())
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import cognee
|
import cognee
|
||||||
import importlib.util
|
import importlib.util
|
||||||
|
|
||||||
# from PIL import Image as PILImage
|
# from PIL import Image as PILImage
|
||||||
from mcp.server.fastmcp import FastMCP
|
from mcp.server.fastmcp import FastMCP
|
||||||
from cognee.api.v1.search import SearchType
|
from cognee.api.v1.search import SearchType
|
||||||
|
|
@ -8,6 +9,7 @@ from cognee.shared.data_models import KnowledgeGraph
|
||||||
|
|
||||||
mcp = FastMCP("cognee", timeout=120000)
|
mcp = FastMCP("cognee", timeout=120000)
|
||||||
|
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
async def cognify(text: str, graph_model_file: str = None, graph_model_name: str = None) -> str:
|
async def cognify(text: str, graph_model_file: str = None, graph_model_name: str = None) -> str:
|
||||||
"""Build knowledge graph from the input text"""
|
"""Build knowledge graph from the input text"""
|
||||||
|
|
@ -19,9 +21,9 @@ async def cognify(text: str, graph_model_file: str = None, graph_model_name: str
|
||||||
await cognee.add(text)
|
await cognee.add(text)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await cognee.cognify(graph_model=graph_model)
|
await cognee.cognify(graph_model=graph_model)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ValueError(f"Failed to cognify: {str(e)}")
|
raise ValueError(f"Failed to cognify: {str(e)}")
|
||||||
|
|
||||||
return "Ingested"
|
return "Ingested"
|
||||||
|
|
||||||
|
|
@ -57,7 +59,6 @@ async def prune() -> str:
|
||||||
# raise ValueError(f"Failed to create visualization: {str(e)}")
|
# raise ValueError(f"Failed to create visualization: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def node_to_string(node):
|
def node_to_string(node):
|
||||||
node_data = ", ".join(
|
node_data = ", ".join(
|
||||||
[f'{key}: "{value}"' for key, value in node.items() if key in ["id", "name"]]
|
[f'{key}: "{value}"' for key, value in node.items() if key in ["id", "name"]]
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,6 @@ from cognee.modules.data.methods import get_datasets
|
||||||
from cognee.modules.data.methods.get_dataset_data import get_dataset_data
|
from cognee.modules.data.methods.get_dataset_data import get_dataset_data
|
||||||
from cognee.modules.data.methods.get_datasets_by_name import get_datasets_by_name
|
from cognee.modules.data.methods.get_datasets_by_name import get_datasets_by_name
|
||||||
from cognee.modules.data.models import Data
|
from cognee.modules.data.models import Data
|
||||||
from cognee.modules.data.operations.write_metadata import write_metadata
|
|
||||||
from cognee.modules.ingestion.data_types import BinaryData
|
from cognee.modules.ingestion.data_types import BinaryData
|
||||||
from cognee.modules.users.methods import get_default_user
|
from cognee.modules.users.methods import get_default_user
|
||||||
from cognee.shared.CodeGraphEntities import Repository
|
from cognee.shared.CodeGraphEntities import Repository
|
||||||
|
|
|
||||||
94
modal_deployment.py
Normal file
94
modal_deployment.py
Normal file
|
|
@ -0,0 +1,94 @@
|
||||||
|
import modal
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import asyncio
|
||||||
|
import cognee
|
||||||
|
import signal
|
||||||
|
|
||||||
|
from cognee.api.v1.search import SearchType
|
||||||
|
from cognee.shared.utils import setup_logging
|
||||||
|
|
||||||
|
app = modal.App("cognee-runner")
|
||||||
|
|
||||||
|
image = (
|
||||||
|
modal.Image.from_dockerfile(path="Dockerfile_modal", force_build=False)
|
||||||
|
.copy_local_file("pyproject.toml", "pyproject.toml")
|
||||||
|
.copy_local_file("poetry.lock", "poetry.lock")
|
||||||
|
.env({"ENV": os.getenv("ENV"), "LLM_API_KEY": os.getenv("LLM_API_KEY")})
|
||||||
|
.poetry_install_from_file(poetry_pyproject_toml="pyproject.toml")
|
||||||
|
.pip_install("protobuf", "h2")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.function(image=image, concurrency_limit=10)
|
||||||
|
async def entry(text: str, query: str):
|
||||||
|
setup_logging(logging.ERROR)
|
||||||
|
await cognee.prune.prune_data()
|
||||||
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
await cognee.add(text)
|
||||||
|
await cognee.cognify()
|
||||||
|
search_results = await cognee.search(SearchType.GRAPH_COMPLETION, query_text=query)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"text": text,
|
||||||
|
"query": query,
|
||||||
|
"answer": search_results[0] if search_results else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.local_entrypoint()
|
||||||
|
async def main():
|
||||||
|
text_queries = [
|
||||||
|
{
|
||||||
|
"text": "NASA's Artemis program aims to return humans to the Moon by 2026, focusing on sustainable exploration and preparing for future Mars missions.",
|
||||||
|
"query": "When does NASA plan to return humans to the Moon under the Artemis program?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "According to a 2022 UN report, global food waste amounts to approximately 931 million tons annually, with households contributing 61% of the total.",
|
||||||
|
"query": "How much food waste do households contribute annually according to the 2022 UN report?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "The 2021 census data revealed that Tokyo's population reached 14 million, reflecting a 2.1% increase compared to the previous census conducted in 2015.",
|
||||||
|
"query": "What was Tokyo's population according to the 2021 census data?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "A recent study published in the Journal of Nutrition found that consuming 30 grams of almonds daily can lower LDL cholesterol levels by 7% over a 12-week period.",
|
||||||
|
"query": "How much can daily almond consumption lower LDL cholesterol according to the study?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "Amazon's Prime membership grew to 200 million subscribers in 2023, marking a 10% increase from the previous year, driven by exclusive content and faster delivery options.",
|
||||||
|
"query": "How many Prime members did Amazon have in 2023?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "A new report by the International Energy Agency states that global renewable energy capacity increased by 295 gigawatts in 2022, primarily driven by solar and wind power expansion.",
|
||||||
|
"query": "By how much did global renewable energy capacity increase in 2022 according to the report?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "The World Health Organization reported in 2023 that the global life expectancy has risen to 73.4 years, an increase of 5.5 years since the year 2000.",
|
||||||
|
"query": "What is the current global life expectancy according to the WHO's 2023 report?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "The FIFA World Cup 2022 held in Qatar attracted a record-breaking audience of 5 billion people across various digital and traditional broadcasting platforms.",
|
||||||
|
"query": "How many people watched the FIFA World Cup 2022?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "The European Space Agency's JUICE mission, launched in 2023, aims to explore Jupiter's icy moons, including Ganymede, Europa, and Callisto, over the next decade.",
|
||||||
|
"query": "Which moons is the JUICE mission set to explore?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "According to a report by the International Labour Organization, the global unemployment rate in 2023 was estimated at 5.4%, reflecting a slight decrease compared to the previous year.",
|
||||||
|
"query": "What was the global unemployment rate in 2023 according to the ILO?",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
tasks = [entry.remote.aio(item["text"], item["query"]) for item in text_queries]
|
||||||
|
|
||||||
|
results = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
print("\nFinal Results:")
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
print(result)
|
||||||
|
print("----")
|
||||||
|
|
||||||
|
os.kill(os.getpid(), signal.SIGTERM)
|
||||||
3
poetry.lock
generated
3
poetry.lock
generated
|
|
@ -9033,6 +9033,7 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
|
||||||
type = ["pytest-mypy"]
|
type = ["pytest-mypy"]
|
||||||
|
|
||||||
[extras]
|
[extras]
|
||||||
|
codegraph = ["jedi", "parso"]
|
||||||
deepeval = ["deepeval"]
|
deepeval = ["deepeval"]
|
||||||
docs = ["unstructured"]
|
docs = ["unstructured"]
|
||||||
falkordb = ["falkordb"]
|
falkordb = ["falkordb"]
|
||||||
|
|
@ -9052,4 +9053,4 @@ weaviate = ["weaviate-client"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.10.0,<3.13"
|
python-versions = ">=3.10.0,<3.13"
|
||||||
content-hash = "585d4ecc16fcc18370d9729046baef7b3b02f92a4860b7f7f7be2d1a26654127"
|
content-hash = "ddc6f5406ee0205107a278cf46918b082d12dbc51471ff6464011731cfd41890"
|
||||||
|
|
|
||||||
|
|
@ -78,6 +78,8 @@ httpx = "0.27.0"
|
||||||
bokeh="^3.6.2"
|
bokeh="^3.6.2"
|
||||||
nltk = "3.9.1"
|
nltk = "3.9.1"
|
||||||
google-generativeai = {version = "^0.8.4", optional = true}
|
google-generativeai = {version = "^0.8.4", optional = true}
|
||||||
|
parso = {version = "^0.8.4", optional = true}
|
||||||
|
jedi = {version = "^0.19.2", optional = true}
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.extras]
|
[tool.poetry.extras]
|
||||||
|
|
@ -96,6 +98,7 @@ falkordb = ["falkordb"]
|
||||||
groq = ["groq"]
|
groq = ["groq"]
|
||||||
milvus = ["pymilvus"]
|
milvus = ["pymilvus"]
|
||||||
docs = ["unstructured"]
|
docs = ["unstructured"]
|
||||||
|
codegraph = ["parso", "jedi"]
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
pytest = "^7.4.0"
|
pytest = "^7.4.0"
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue