<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> # Add Support for ChromaDB ## Summary This PR adds support for ChromaDB as a vector database option in the Cognee application. ChromaDB is a modern, open-source embedding database designed for AI applications. ## Changes - Created a new ChromaDBAdapter implementation for vector database operations - Added comprehensive test suite for ChromaDB functionality - Updated docker-compose.yml to include ChromaDB service - Modified environment configuration to support ChromaDB settings - Updated vector engine creation logic to support ChromaDB as an option ## Technical Details - Implemented `ChromaDBAdapter.py` (347 lines) with full CRUD operations for vector data - Created test suite (`test_chromadb.py`) with 171 lines of test coverage - Updated vector engine creation process to dynamically select ChromaDB when configured - Modified settings router to accommodate new database option - Updated environment template with ChromaDB configuration options ## Docker Changes - Added ChromaDB service to docker-compose.yml with appropriate configuration This PR enhances Cognee's flexibility by providing an alternative vector database option, allowing users to choose the most appropriate database for their specific use case. ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin Tested with UI + tests. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Expanded vector database integration by adding support for Chromadb, enabling enhanced data management and search functionalities. - **Tests** - Added automated tests to validate the Chromadb integration and related operations. - **Chores** - Updated configuration guidance and dependency management to include Chromadb. - Provided an optional container deployment template for Chromadb. - Added a new entry to ignore the `.chromadb_data/` directory in version control. - Introduced a new GitHub Actions workflow for testing Chromadb integration. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: hajdul88 <52442977+hajdul88@users.noreply.github.com> Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
49 lines
1.1 KiB
Text
49 lines
1.1 KiB
Text
ENV="local"
|
|
TOKENIZERS_PARALLELISM="false"
|
|
|
|
# LLM Configuration
|
|
LLM_API_KEY=""
|
|
LLM_MODEL="openai/gpt-4o-mini"
|
|
LLM_PROVIDER="openai"
|
|
LLM_ENDPOINT=""
|
|
LLM_API_VERSION=""
|
|
LLM_MAX_TOKENS="16384"
|
|
|
|
GRAPHISTRY_USERNAME=
|
|
GRAPHISTRY_PASSWORD=
|
|
|
|
SENTRY_REPORTING_URL=
|
|
|
|
# Embedding Configuration
|
|
EMBEDDING_PROVIDER="openai"
|
|
EMBEDDING_API_KEY=""
|
|
EMBEDDING_MODEL="openai/text-embedding-3-large"
|
|
EMBEDDING_ENDPOINT=""
|
|
EMBEDDING_API_VERSION=""
|
|
EMBEDDING_DIMENSIONS=3072
|
|
EMBEDDING_MAX_TOKENS=8191
|
|
|
|
# "neo4j" or "networkx"
|
|
GRAPH_DATABASE_PROVIDER="networkx"
|
|
# Not needed if using networkx
|
|
GRAPH_DATABASE_URL=
|
|
GRAPH_DATABASE_USERNAME=
|
|
GRAPH_DATABASE_PASSWORD=
|
|
|
|
# "qdrant", "pgvector", "weaviate", "milvus", "lancedb" or "chromadb"
|
|
VECTOR_DB_PROVIDER="lancedb"
|
|
# Not needed if using "lancedb" or "pgvector"
|
|
VECTOR_DB_URL=
|
|
VECTOR_DB_KEY=
|
|
|
|
# Relational Database provider "sqlite" or "postgres"
|
|
DB_PROVIDER="sqlite"
|
|
|
|
# Database name
|
|
DB_NAME=cognee_db
|
|
|
|
# Postgres specific parameters (Only if Postgres or PGVector is used). Do not use for cognee default simplest setup of SQLite-NetworkX-LanceDB
|
|
# DB_HOST=127.0.0.1
|
|
# DB_PORT=5432
|
|
# DB_USERNAME=cognee
|
|
# DB_PASSWORD=cognee
|