diff --git a/cognee-starter-kit/.env.template b/cognee-starter-kit/.env.template deleted file mode 100644 index 1aefd2739..000000000 --- a/cognee-starter-kit/.env.template +++ /dev/null @@ -1,19 +0,0 @@ -# In case you choose to use OpenAI provider, just adjust the model and api_key. -LLM_API_KEY="" -LLM_MODEL="openai/gpt-5-mini" -LLM_PROVIDER="openai" -# Not needed if you use OpenAI -LLM_ENDPOINT="" -LLM_API_VERSION="" - -# In case you choose to use OpenAI provider, just adjust the model and api_key. -EMBEDDING_API_KEY="" -EMBEDDING_MODEL="openai/text-embedding-3-large" -EMBEDDING_PROVIDER="openai" -# Not needed if you use OpenAI -EMBEDDING_ENDPOINT="" -EMBEDDING_API_VERSION="" - - -GRAPHISTRY_USERNAME="" -GRAPHISTRY_PASSWORD="" \ No newline at end of file diff --git a/cognee-starter-kit/.gitignore b/cognee-starter-kit/.gitignore deleted file mode 100644 index c99e3a58e..000000000 --- a/cognee-starter-kit/.gitignore +++ /dev/null @@ -1,196 +0,0 @@ -.data -.env -.local.env -.prod.env -cognee/.data/ - -code_pipeline_output*/ - -*.lance/ -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -full_run.ipynb - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Cognee logs directory - keep directory, ignore contents -logs/* -!logs/.gitkeep -!logs/README.md - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.env.local -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -.idea/ - -.vscode/ -cognee/data/ -cognee/cache/ - -# Default cognee system directory, used in development -.cognee_system/ -.data_storage/ -.artifacts/ -.anon_id - -node_modules/ - -# Evals -SWE-bench_testsample/ - -# ChromaDB Data -.chromadb_data/ diff --git a/cognee-starter-kit/README.md b/cognee-starter-kit/README.md deleted file mode 100644 index c265e278e..000000000 --- a/cognee-starter-kit/README.md +++ /dev/null @@ -1,89 +0,0 @@ - -# Cognee Starter Kit -Welcome to the cognee Starter Repo! This repository is designed to help you get started quickly by providing a structured dataset and pre-built data pipelines using cognee to build powerful knowledge graphs. - -You can use this repo to ingest, process, and visualize data in minutes. - -By following this guide, you will: - -- Load structured company and employee data -- Utilize pre-built pipelines for data processing -- Perform graph-based search and query operations -- Visualize entity relationships effortlessly on a graph - -# How to Use This Repo 🛠 - -## Install uv if you don't have it on your system -``` -pip install uv -``` -## Install dependencies -``` -uv sync -``` - -## Setup LLM -Add environment variables to `.env` file. -In case you choose to use OpenAI provider, add just the model and api_key. -``` -LLM_PROVIDER="" -LLM_MODEL="" -LLM_ENDPOINT="" -LLM_API_KEY="" -LLM_API_VERSION="" - -EMBEDDING_PROVIDER="" -EMBEDDING_MODEL="" -EMBEDDING_ENDPOINT="" -EMBEDDING_API_KEY="" -EMBEDDING_API_VERSION="" -``` - -Activate the Python environment: -``` -source .venv/bin/activate -``` - -## Run the Default Pipeline - -This script runs the cognify pipeline with default settings. It ingests text data, builds a knowledge graph, and allows you to run search queries. - -``` -python src/pipelines/default.py -``` - -## Run the Low-Level Pipeline - -This script implements its own pipeline with custom ingestion task. It processes the given JSON data about companies and employees, making it searchable via a graph. - -``` -python src/pipelines/low_level.py -``` - -## Run the Custom Model Pipeline - -Custom model uses custom pydantic model for graph extraction. This script categorizes programming languages as an example and visualizes relationships. - -``` -python src/pipelines/custom-model.py -``` - -## Graph preview - -cognee provides a visualize_graph function that will render the graph for you. - -``` - graph_file_path = str( - pathlib.Path( - os.path.join(pathlib.Path(__file__).parent, ".artifacts/graph_visualization.html") - ).resolve() - ) - await visualize_graph(graph_file_path) -``` - -# What will you build with cognee? - -- Expand the dataset by adding more structured/unstructured data -- Customize the data model to fit your use case -- Use the search API to build an intelligent assistant -- Visualize knowledge graphs for better insights diff --git a/cognee-starter-kit/pyproject.toml b/cognee-starter-kit/pyproject.toml deleted file mode 100644 index 42ae027c7..000000000 --- a/cognee-starter-kit/pyproject.toml +++ /dev/null @@ -1,11 +0,0 @@ -[project] -name = "cognee-starter" -version = "0.1.1" -description = "Starter project which can be harvested for parts" -readme = "README.md" - -requires-python = ">=3.10, <=3.13" - -dependencies = [ - "cognee>=0.1.38,<1.0.0", -] diff --git a/cognee-starter-kit/src/data/companies.json b/cognee-starter-kit/src/data/companies.json deleted file mode 100644 index c402ec802..000000000 --- a/cognee-starter-kit/src/data/companies.json +++ /dev/null @@ -1,38 +0,0 @@ -[ - { - "name": "TechNova Inc.", - "departments": [ - "Engineering", - "Marketing" - ] - }, - { - "name": "GreenFuture Solutions", - "departments": [ - "Research & Development", - "Sales", - "Customer Support" - ] - }, - { - "name": "Skyline Financials", - "departments": [ - "Accounting" - ] - }, - { - "name": "MediCare Plus", - "departments": [ - "Healthcare", - "Administration" - ] - }, - { - "name": "NextGen Robotics", - "departments": [ - "AI Development", - "Manufacturing", - "HR" - ] - } -] diff --git a/cognee-starter-kit/src/data/people.json b/cognee-starter-kit/src/data/people.json deleted file mode 100644 index 3474d8794..000000000 --- a/cognee-starter-kit/src/data/people.json +++ /dev/null @@ -1,52 +0,0 @@ -[ - { - "name": "John Doe", - "company": "TechNova Inc.", - "department": "Engineering" - }, - { - "name": "Jane Smith", - "company": "TechNova Inc.", - "department": "Marketing" - }, - { - "name": "Alice Johnson", - "company": "GreenFuture Solutions", - "department": "Sales" - }, - { - "name": "Bob Williams", - "company": "GreenFuture Solutions", - "department": "Customer Support" - }, - { - "name": "Michael Brown", - "company": "Skyline Financials", - "department": "Accounting" - }, - { - "name": "Emily Davis", - "company": "MediCare Plus", - "department": "Healthcare" - }, - { - "name": "David Wilson", - "company": "MediCare Plus", - "department": "Administration" - }, - { - "name": "Emma Thompson", - "company": "NextGen Robotics", - "department": "AI Development" - }, - { - "name": "Chris Martin", - "company": "NextGen Robotics", - "department": "Manufacturing" - }, - { - "name": "Sophia White", - "company": "NextGen Robotics", - "department": "HR" - } -] diff --git a/cognee-starter-kit/src/pipelines/default.py b/cognee-starter-kit/src/pipelines/default.py deleted file mode 100644 index e757ffc3f..000000000 --- a/cognee-starter-kit/src/pipelines/default.py +++ /dev/null @@ -1,67 +0,0 @@ -import os -import asyncio -import pathlib -from cognee import config, add, cognify, search, SearchType, prune, visualize_graph - - -async def main(): - data_directory_path = str( - pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage")).resolve() - ) - # Set up the data directory. Cognee will store files here. - config.data_root_directory(data_directory_path) - - cognee_directory_path = str( - pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system")).resolve() - ) - # Set up the Cognee system directory. Cognee will store system files and databases here. - config.system_root_directory(cognee_directory_path) - - # Prune data and system metadata before running, only if we want "fresh" state. - await prune.prune_data() - await prune.prune_system(metadata=True) - - text = "The Python programming language is widely used in data analysis, web development, and machine learning." - - # Add the text data to Cognee. - await add(text) - - # Cognify the text data. - await cognify() - - # Or use our simple graph preview - graph_file_path = str( - pathlib.Path( - os.path.join(pathlib.Path(__file__).parent, ".artifacts/graph_visualization.html") - ).resolve() - ) - await visualize_graph(graph_file_path) - - # Completion query that uses graph data to form context. - graph_completion = await search( - query_text="What is python?", query_type=SearchType.GRAPH_COMPLETION - ) - print("Graph completion result is:") - print(graph_completion) - - # Completion query that uses document chunks to form context. - rag_completion = await search( - query_text="What is Python?", query_type=SearchType.RAG_COMPLETION - ) - print("Completion result is:") - print(rag_completion) - - # Query all summaries related to query. - summaries = await search(query_text="Python", query_type=SearchType.SUMMARIES) - print("Summary results are:") - for summary in summaries: - print(summary) - - chunks = await search(query_text="Python", query_type=SearchType.CHUNKS) - print("Chunk results are:") - for chunk in chunks: - print(chunk) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/cognee-starter-kit/src/pipelines/low_level.py b/examples/low_level/granular_level.py similarity index 98% rename from cognee-starter-kit/src/pipelines/low_level.py rename to examples/low_level/granular_level.py index 80f4a22e9..115b3b425 100644 --- a/cognee-starter-kit/src/pipelines/low_level.py +++ b/examples/low_level/granular_level.py @@ -49,12 +49,11 @@ class Company(DataPoint): ROOT = Path(__file__).resolve().parent -DATA_DIR = ROOT.parent / "data" COGNEE_DIR = ROOT / ".cognee_system" ARTIFACTS_DIR = ROOT / ".artifacts" GRAPH_HTML = ARTIFACTS_DIR / "graph_visualization.html" -COMPANIES_JSON = DATA_DIR / "companies.json" -PEOPLE_JSON = DATA_DIR / "people.json" +COMPANIES_JSON = ROOT / "companies.json" +PEOPLE_JSON = ROOT / "people.json" def load_json_file(path: Path) -> Any: diff --git a/cognee-starter-kit/src/pipelines/custom-model.py b/examples/python/custom-graph-model-example.py similarity index 100% rename from cognee-starter-kit/src/pipelines/custom-model.py rename to examples/python/custom-graph-model-example.py