diff --git a/.github/workflows/db_examples_tests.yml b/.github/workflows/db_examples_tests.yml index b3ca7ce58..f1613e493 100644 --- a/.github/workflows/db_examples_tests.yml +++ b/.github/workflows/db_examples_tests.yml @@ -33,10 +33,7 @@ on: required: false QDRANT_API_KEY: required: false - WEAVIATE_API_URL: - required: false - WEAVIATE_API_KEY: - required: false + POSTGRES_PASSWORD: required: false NEO4J_API_URL: @@ -116,38 +113,7 @@ jobs: run: | poetry run python examples/database_examples/kuzu_example.py - run-db-example-weaviate: - name: "Weaviate DB Example Test" - runs-on: ubuntu-22.04 - if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'weaviate') }} - steps: - - name: Check out - uses: actions/checkout@v4 - - name: Cognee Setup - uses: ./.github/actions/cognee_setup - with: - python-version: ${{ inputs.python-version }} - - - name: Install Weaviate extra - run: | - poetry install -E weaviate - - - name: Run Weaviate Example - env: - ENV: dev - LLM_MODEL: ${{ secrets.LLM_MODEL }} - LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} - LLM_API_KEY: ${{ secrets.LLM_API_KEY }} - LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} - EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} - EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} - EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} - EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - VECTOR_DB_URL: ${{ secrets.WEAVIATE_API_URL }} - VECTOR_DB_KEY: ${{ secrets.WEAVIATE_API_KEY }} - run: | - poetry run python examples/database_examples/weaviate_example.py run-db-example-qdrant: name: "Qdrant DB Example Test" diff --git a/.github/workflows/graph_db_tests.yml b/.github/workflows/graph_db_tests.yml index 94ff639ba..379539e14 100644 --- a/.github/workflows/graph_db_tests.yml +++ b/.github/workflows/graph_db_tests.yml @@ -11,11 +11,6 @@ on: type: string default: "all" description: "Which vector databases to test (comma-separated list or 'all')" - secrets: - WEAVIATE_API_URL: - required: false - WEAVIATE_API_KEY: - required: false jobs: run-kuzu-tests: diff --git a/.github/workflows/search_db_tests.yml b/.github/workflows/search_db_tests.yml index bae781611..3f4bbf1ca 100644 --- a/.github/workflows/search_db_tests.yml +++ b/.github/workflows/search_db_tests.yml @@ -11,11 +11,6 @@ on: type: string default: "all" description: "Which vector databases to test (comma-separated list or 'all')" - secrets: - WEAVIATE_API_URL: - required: false - WEAVIATE_API_KEY: - required: false jobs: run-kuzu-lance-sqlite-search-tests: diff --git a/.github/workflows/vector_db_tests.yml b/.github/workflows/vector_db_tests.yml index 32abd468a..b68be4102 100644 --- a/.github/workflows/vector_db_tests.yml +++ b/.github/workflows/vector_db_tests.yml @@ -9,10 +9,7 @@ on: default: "all" description: "Which vector databases to test (comma-separated list or 'all')" secrets: - WEAVIATE_API_URL: - required: false - WEAVIATE_API_KEY: - required: false + POSTGRES_PASSWORD: required: false @@ -61,39 +58,6 @@ jobs: # EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} # run: poetry run python ./cognee/tests/test_chromadb.py - run-weaviate-tests: - name: Weaviate Tests - runs-on: ubuntu-22.04 - if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'weaviate') }} - steps: - - name: Check out - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Cognee Setup - uses: ./.github/actions/cognee_setup - with: - python-version: ${{ inputs.python-version }} - - - name: Install specific db dependency - run: | - poetry install -E weaviate - - - name: Run Weaviate Tests - env: - ENV: 'dev' - LLM_MODEL: ${{ secrets.LLM_MODEL }} - LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} - LLM_API_KEY: ${{ secrets.LLM_API_KEY }} - LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} - EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} - EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} - EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} - EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} - VECTOR_DB_URL: ${{ secrets.WEAVIATE_API_URL }} - VECTOR_DB_KEY: ${{ secrets.WEAVIATE_API_KEY }} - run: poetry run python ./cognee/tests/test_weaviate.py run_qdrant_integration_test: name: Qdrant Tests diff --git a/Dockerfile b/Dockerfile index e0cb64e20..77bfdff11 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,7 +31,7 @@ COPY README.md pyproject.toml uv.lock entrypoint.sh ./ # Install the project's dependencies using the lockfile and settings RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --extra debug --extra api --extra postgres --extra weaviate --extra qdrant --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable + uv sync --extra debug --extra api --extra postgres --extra qdrant --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable # Copy Alembic configuration COPY alembic.ini /app/alembic.ini @@ -41,7 +41,7 @@ COPY alembic/ /app/alembic # Installing separately from its dependencies allows optimal layer caching COPY ./cognee /app/cognee RUN --mount=type=cache,target=/root/.cache/uv \ -uv sync --extra debug --extra api --extra postgres --extra weaviate --extra qdrant --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable +uv sync --extra debug --extra api --extra postgres --extra qdrant --extra neo4j --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable FROM python:3.12-slim-bookworm diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index 67b45c484..8bbceaf7f 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -19,7 +19,7 @@ def create_vector_engine( for each provider, raising an EnvironmentError if any are missing, or ImportError if the ChromaDB package is not installed. - Supported providers include: Weaviate, Qdrant, pgvector, FalkorDB, ChromaDB, and + Supported providers include: Qdrant, pgvector, FalkorDB, ChromaDB, and LanceDB. Parameters: @@ -30,7 +30,7 @@ def create_vector_engine( providers. - vector_db_key (str): The API key or access token for the vector database instance. - vector_db_provider (str): The name of the vector database provider to use (e.g., - 'weaviate', 'qdrant'). + 'qdrant', 'pgvector'). Returns: -------- @@ -48,15 +48,7 @@ def create_vector_engine( embedding_engine=embedding_engine, ) - if vector_db_provider == "weaviate": - from .weaviate_db import WeaviateAdapter - - if not (vector_db_url and vector_db_key): - raise EnvironmentError("Missing requred Weaviate credentials!") - - return WeaviateAdapter(vector_db_url, vector_db_key, embedding_engine=embedding_engine) - - elif vector_db_provider == "qdrant": + if vector_db_provider == "qdrant": if not (vector_db_url and vector_db_key): raise EnvironmentError("Missing requred Qdrant credentials!") diff --git a/cognee/modules/settings/get_settings.py b/cognee/modules/settings/get_settings.py index c7fe3aed2..cd09549d9 100644 --- a/cognee/modules/settings/get_settings.py +++ b/cognee/modules/settings/get_settings.py @@ -43,10 +43,6 @@ def get_settings() -> SettingsDict: llm_config = get_llm_config() vector_dbs = [ - { - "value": "weaviate", - "label": "Weaviate", - }, { "value": "qdrant", "label": "Qdrant", diff --git a/cognee/modules/settings/save_vector_db_config.py b/cognee/modules/settings/save_vector_db_config.py index 44b917cf7..3f7d0941a 100644 --- a/cognee/modules/settings/save_vector_db_config.py +++ b/cognee/modules/settings/save_vector_db_config.py @@ -6,7 +6,7 @@ from cognee.infrastructure.databases.vector import get_vectordb_config class VectorDBConfig(BaseModel): url: str api_key: str - provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]] + provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["pgvector"]] async def save_vector_db_config(vector_db_config: VectorDBConfig): diff --git a/cognee/tests/test_weaviate.py b/cognee/tests/test_weaviate.py deleted file mode 100644 index 9a9be82eb..000000000 --- a/cognee/tests/test_weaviate.py +++ /dev/null @@ -1,94 +0,0 @@ -import os -import pathlib -import cognee -from cognee.infrastructure.files.storage import get_storage_config -from cognee.modules.search.operations import get_history -from cognee.modules.users.methods import get_default_user -from cognee.shared.logging_utils import get_logger -from cognee.modules.search.types import SearchType - -logger = get_logger() - - -async def main(): - cognee.config.set_vector_db_provider("weaviate") - data_directory_path = str( - pathlib.Path( - os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_weaviate") - ).resolve() - ) - cognee.config.data_root_directory(data_directory_path) - cognee_directory_path = str( - pathlib.Path( - os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_weaviate") - ).resolve() - ) - cognee.config.system_root_directory(cognee_directory_path) - - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - - dataset_name = "cs_explanations" - - explanation_file_path = os.path.join( - pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt" - ) - await cognee.add([explanation_file_path], dataset_name) - - text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena. - At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states. - Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible. - The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly. - Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate. - In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited. - """ - - await cognee.add([text], dataset_name) - - await cognee.cognify([dataset_name]) - - from cognee.infrastructure.databases.vector import get_vector_engine - - vector_engine = get_vector_engine() - random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0] - random_node_name = random_node.payload["text"] - - search_results = await cognee.search( - query_text=random_node_name, query_type=SearchType.INSIGHTS - ) - assert len(search_results) != 0, "The search results list is empty." - print("\n\nExtracted sentences are:\n") - for result in search_results: - print(f"{result}\n") - - search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name) - assert len(search_results) != 0, "The search results list is empty." - print("\n\nExtracted chunks are:\n") - for result in search_results: - print(f"{result}\n") - - search_results = await cognee.search( - query_type=SearchType.SUMMARIES, query_text=random_node_name - ) - assert len(search_results) != 0, "Query related summaries don't exist." - print("\nExtracted summaries are:\n") - for result in search_results: - print(f"{result}\n") - - user = await get_default_user() - history = await get_history(user.id) - assert len(history) == 6, "Search history is not correct." - - await cognee.prune.prune_data() - data_root_directory = get_storage_config()["data_root_directory"] - assert not os.path.isdir(data_root_directory), "Local data files are not deleted" - - await cognee.prune.prune_system(metadata=True) - collections = await get_vector_engine().client.collections.list_all() - assert len(collections) == 0, "Weaviate vector database is not empty" - - -if __name__ == "__main__": - import asyncio - - asyncio.run(main()) diff --git a/deployment/helm/Dockerfile b/deployment/helm/Dockerfile index 6641c70c5..9d7f74653 100644 --- a/deployment/helm/Dockerfile +++ b/deployment/helm/Dockerfile @@ -3,7 +3,7 @@ FROM python:3.11-slim # Define Poetry extras to install ARG POETRY_EXTRAS="\ # Storage & Databases \ -postgres weaviate qdrant neo4j falkordb kuzu \ +postgres qdrant neo4j falkordb kuzu \ # Notebooks & Interactive Environments \ notebook \ # LLM & AI Frameworks \ diff --git a/examples/database_examples/weaviate_example.py b/examples/database_examples/weaviate_example.py deleted file mode 100644 index b14cc3c97..000000000 --- a/examples/database_examples/weaviate_example.py +++ /dev/null @@ -1,92 +0,0 @@ -import os -import pathlib -import asyncio -import cognee -from cognee.modules.search.types import SearchType - - -async def main(): - """ - Example script demonstrating how to use Cognee with Weaviate - - This example: - 1. Configures Cognee to use Weaviate as vector database - 2. Sets up data directories - 3. Adds sample data to Cognee - 4. Processes (cognifies) the data - 5. Performs different types of searches - """ - - # Set up Weaviate credentials in .env file and get the values from environment variables - weaviate_url = os.getenv("VECTOR_DB_URL") - weaviate_key = os.getenv("VECTOR_DB_KEY") - - # Configure Weaviate as the vector database provider - cognee.config.set_vector_db_config( - { - "vector_db_url": weaviate_url, # Set your Weaviate Endpoint - "vector_db_key": weaviate_key, # Set your Weaviate API key - "vector_db_provider": "weaviate", # Specify Weaviate as provider - } - ) - - # Set up data directories for storing documents and system files - # You should adjust these paths to your needs - current_dir = pathlib.Path(__file__).parent - data_directory_path = str(current_dir / "data_storage") - cognee.config.data_root_directory(data_directory_path) - - cognee_directory_path = str(current_dir / "cognee_system") - cognee.config.system_root_directory(cognee_directory_path) - - # Clean any existing data (optional) - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - - # Create a dataset - dataset_name = "weaviate_example" - - # Add sample text to the dataset - sample_text = """Weaviate is an open-source vector database that stores both objects and vectors. - It enables vector search with GraphQL-based filtering capabilities. - Weaviate can be deployed in the cloud, on-premise, or embedded in your application. - It allows users to search through vectors using different algorithms and metrics. - Weaviate supports various modules for text2vec transformations, including BERT, OpenAI, and other models. - It can index data in multiple ways and offers features like semantic search, classification, and contextualization.""" - - # Add the sample text to the dataset - await cognee.add([sample_text], dataset_name) - - # Process the added document to extract knowledge - await cognee.cognify([dataset_name]) - - # Now let's perform some searches - # 1. Search for insights related to "Weaviate" - insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Weaviate") - print("\nInsights about Weaviate:") - for result in insights_results: - print(f"- {result}") - - # 2. Search for text chunks related to "vector search" - chunks_results = await cognee.search( - query_type=SearchType.CHUNKS, query_text="vector search", datasets=[dataset_name] - ) - print("\nChunks about vector search:") - for result in chunks_results: - print(f"- {result}") - - # 3. Get graph completion related to databases - graph_completion_results = await cognee.search( - query_type=SearchType.GRAPH_COMPLETION, query_text="database" - ) - print("\nGraph completion for databases:") - for result in graph_completion_results: - print(f"- {result}") - - # Clean up (optional) - # await cognee.prune.prune_data() - # await cognee.prune.prune_system(metadata=True) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/mypy.ini b/mypy.ini index 089fde35a..ba3dd40a4 100644 --- a/mypy.ini +++ b/mypy.ini @@ -74,8 +74,7 @@ ignore_missing_imports=true [mypy-prometheus_client.*] ignore_missing_imports=true -[mypy-weaviate.*] -ignore_missing_imports=true + [mypy-psycopg2cffi.*] ignore_missing_imports=true diff --git a/pyproject.toml b/pyproject.toml index c82b6ab58..b17200eec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ api = [ distributed = [ "modal>=1.0.5,<2.0.0", ] -weaviate = ["weaviate-client>=4.9.6,<5.0.0"] + qdrant = ["qdrant-client>=1.14.2,<2"] neo4j = ["neo4j>=5.28.0,<6"] postgres = [