diff --git a/.github/workflows/vector_db_tests.yml b/.github/workflows/vector_db_tests.yml index 65b70abe5..6e9e34493 100644 --- a/.github/workflows/vector_db_tests.yml +++ b/.github/workflows/vector_db_tests.yml @@ -103,6 +103,55 @@ jobs: EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} run: uv run python ./cognee/tests/test_pgvector.py + run-pgvector-multi-user-tests: + name: PGVector Multi-User Tests + runs-on: ubuntu-22.04 + if: ${{ inputs.databases == 'all' || contains(inputs.databases, 'postgres') }} + services: + postgres: + image: pgvector/pgvector:pg17 + env: + POSTGRES_USER: cognee + POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }} + POSTGRES_DB: cognee_db + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + steps: + - name: Check out + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Cognee Setup + uses: ./.github/actions/cognee_setup + with: + python-version: ${{ inputs.python-version }} + extra-dependencies: "postgres" + + - name: Run PGVector Permissions Tests + env: + ENV: 'dev' + ENABLE_BACKEND_ACCESS_CONTROL: 'true' + LLM_MODEL: ${{ secrets.LLM_MODEL }} + LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }} + LLM_API_KEY: ${{ secrets.LLM_API_KEY }} + LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }} + EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }} + EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} + EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} + EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} + VECTOR_DB_URL: 127.0.0.1 + VECTOR_DB_PORT: 5432 + VECTOR_DB_USERNAME: cognee + VECTOR_DB_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }} + VECTOR_DATASET_DATABASE_HANDLER: pgvector + run: uv run python ./cognee/tests/test_permissions.py + run-lancedb-tests: name: LanceDB Tests runs-on: ubuntu-22.04 diff --git a/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py b/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py index 1bac5c4ef..5ba1a91a1 100644 --- a/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +++ b/cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py @@ -17,7 +17,7 @@ supported_dataset_database_handlers = { "handler_provider": "neo4j", }, "lancedb": {"handler_instance": LanceDBDatasetDatabaseHandler, "handler_provider": "lancedb"}, - "pgvector": { + "pgvector_local": { "handler_instance": PGVectorDatasetDatabaseHandler, "handler_provider": "pgvector", }, diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py index a1834688b..6e5e23a5a 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py @@ -13,35 +13,6 @@ class PGVectorDatasetDatabaseHandler(DatasetDatabaseHandlerInterface): Handler for interacting with PGVector Dataset databases. """ - @classmethod - async def _create_pg_database(cls, vector_config): - """ - Create the necessary Postgres database, and the PGVector extension on it. - This is defined here because the creation needs the latest vector config, - which is not yet saved in the vector config context variable here. - """ - from cognee.infrastructure.databases.relational.create_relational_engine import ( - create_relational_engine, - ) - - from cognee.infrastructure.databases.vector.create_vector_engine import create_vector_engine - from sqlalchemy import text - - pg_relational_engine = create_relational_engine( - db_path="", - db_host=vector_config["vector_db_url"], - db_name=vector_config["vector_db_name"], - db_port=vector_config["vector_db_port"], - db_username=vector_config["vector_db_username"], - db_password=vector_config["vector_db_password"], - db_provider="postgres", - ) - await pg_relational_engine.create_database() - - vector_engine = create_vector_engine(**vector_config) - async with vector_engine.engine.begin() as connection: - await connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;")) - @classmethod async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict: vector_config = get_vectordb_config() @@ -65,7 +36,9 @@ class PGVectorDatasetDatabaseHandler(DatasetDatabaseHandlerInterface): "vector_dataset_database_handler": "pgvector", } - await cls._create_pg_database( + from .create_db_and_tables import create_pg_database + + await create_pg_database( { "vector_db_provider": new_vector_config["vector_database_provider"], "vector_db_url": new_vector_config["vector_database_url"], diff --git a/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py b/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py index c2477086d..e1d46984e 100644 --- a/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +++ b/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py @@ -10,3 +10,34 @@ async def create_db_and_tables(): if vector_config["vector_db_provider"] == "pgvector": async with vector_engine.engine.begin() as connection: await connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;")) + + +async def create_pg_database(vector_config): + """ + Create the necessary Postgres database, and the PGVector extension on it. + This is defined separately because the creation needs the latest vector config, + which is not yet saved in the vector config context variable. + + TODO: We can maybe merge this with create_db_and_tables(), but it seemed simpler to separate them for now + """ + from cognee.infrastructure.databases.relational.create_relational_engine import ( + create_relational_engine, + ) + + from cognee.infrastructure.databases.vector.create_vector_engine import create_vector_engine + from sqlalchemy import text + + pg_relational_engine = create_relational_engine( + db_path="", + db_host=vector_config["vector_db_url"], + db_name=vector_config["vector_db_name"], + db_port=vector_config["vector_db_port"], + db_username=vector_config["vector_db_username"], + db_password=vector_config["vector_db_password"], + db_provider="postgres", + ) + await pg_relational_engine.create_database() + + vector_engine = create_vector_engine(**vector_config) + async with vector_engine.engine.begin() as connection: + await connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))