fix: various fixes for the deployment

* fix: remove groups from UserRead model

* fix: add missing system dependencies for postgres

* fix: change vector db provider environment variable name

* fix: WeaviateAdapter retrieve bug

* fix: correctly return data point objects from retrieve method

* fix: align graph object properties

* feat: add node example
This commit is contained in:
Boris 2024-10-22 11:26:48 +02:00 committed by GitHub
parent 9100de75f5
commit 2f832b190c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
27 changed files with 413 additions and 45 deletions

View file

@ -13,7 +13,7 @@ GRAPH_DATABASE_URL=
GRAPH_DATABASE_USERNAME= GRAPH_DATABASE_USERNAME=
GRAPH_DATABASE_PASSWORD= GRAPH_DATABASE_PASSWORD=
VECTOR_ENGINE_PROVIDER="qdrant" # or "weaviate" or "lancedb" VECTOR_DB_PROVIDER="qdrant" # or "weaviate" or "lancedb"
# Not needed if using "lancedb" # Not needed if using "lancedb"
VECTOR_DB_URL= VECTOR_DB_URL=
VECTOR_DB_KEY= VECTOR_DB_KEY=

2
.gitignore vendored
View file

@ -177,3 +177,5 @@ cognee/cache/
# Default cognee system directory, used in development # Default cognee system directory, used in development
.cognee_system/ .cognee_system/
.data_storage/ .data_storage/
node_modules/

View file

@ -8,9 +8,17 @@ ENV DEBUG=${DEBUG}
ENV PIP_NO_CACHE_DIR=true ENV PIP_NO_CACHE_DIR=true
ENV PATH="${PATH}:/root/.poetry/bin" ENV PATH="${PATH}:/root/.poetry/bin"
RUN apt-get update && apt-get install
RUN apt-get install -y \
gcc \
libpq-dev
WORKDIR /app WORKDIR /app
COPY pyproject.toml poetry.lock /app/ COPY pyproject.toml poetry.lock /app/
RUN pip install poetry RUN pip install poetry
# Don't create virtualenv since docker is already isolated # Don't create virtualenv since docker is already isolated
@ -18,15 +26,16 @@ RUN poetry config virtualenvs.create false
# Install the dependencies # Install the dependencies
RUN poetry install --no-root --no-dev RUN poetry install --no-root --no-dev
# Set the PYTHONPATH environment variable to include the /app directory # Set the PYTHONPATH environment variable to include the /app directory
ENV PYTHONPATH=/app ENV PYTHONPATH=/app
COPY cognee/ cognee/ COPY cognee/ /app/cognee
# Copy Alembic configuration # Copy Alembic configuration
COPY alembic.ini ./ COPY alembic.ini /app/alembic.ini
COPY alembic/ alembic/ COPY alembic/ /app/alembic
COPY entrypoint.sh /app/entrypoint.sh COPY entrypoint.sh /app/entrypoint.sh
RUN chmod +x /app/entrypoint.sh RUN chmod +x /app/entrypoint.sh

View file

@ -92,6 +92,8 @@ if db_engine.engine.dialect.name == "sqlite":
db_config = get_relational_config() db_config = get_relational_config()
LocalStorage.ensure_directory_exists(db_config.db_path) LocalStorage.ensure_directory_exists(db_config.db_path)
print("Using database:", db_engine.db_uri)
config.set_section_option( config.set_section_option(
config.config_ini_section, config.config_ini_section,
"SQLALCHEMY_DATABASE_URI", "SQLALCHEMY_DATABASE_URI",

View file

@ -16,7 +16,7 @@ from cognee.modules.users.methods import create_default_user, delete_user
revision: str = '482cd6517ce4' revision: str = '482cd6517ce4'
down_revision: Union[str, None] = '8057ae7329c2' down_revision: Union[str, None] = '8057ae7329c2'
branch_labels: Union[str, Sequence[str], None] = None branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = "8057ae7329c2"
def upgrade() -> None: def upgrade() -> None:

View file

@ -43,13 +43,13 @@ async def lifespan(app: FastAPI):
# from cognee.modules.data.deletion import prune_system, prune_data # from cognee.modules.data.deletion import prune_system, prune_data
# await prune_data() # await prune_data()
# await prune_system(metadata = True) # await prune_system(metadata = True)
if app_environment == "local" or app_environment == "dev": # if app_environment == "local" or app_environment == "dev":
from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.databases.relational import get_relational_engine
db_engine = get_relational_engine() db_engine = get_relational_engine()
await db_engine.create_database() await db_engine.create_database()
from cognee.modules.users.methods import get_default_user from cognee.modules.users.methods import get_default_user
await get_default_user() await get_default_user()
yield yield

View file

@ -21,7 +21,7 @@ class config():
graph_config.graph_file_path = os.path.join(databases_directory_path, "cognee.graph") graph_config.graph_file_path = os.path.join(databases_directory_path, "cognee.graph")
vector_config = get_vectordb_config() vector_config = get_vectordb_config()
if vector_config.vector_engine_provider == "lancedb": if vector_config.vector_db_provider == "lancedb":
vector_config.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb") vector_config.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb")
@staticmethod @staticmethod
@ -91,9 +91,9 @@ class config():
@staticmethod @staticmethod
def set_vector_engine_provider(vector_engine_provider: str): def set_vector_db_provider(vector_db_provider: str):
vector_db_config = get_vectordb_config() vector_db_config = get_vectordb_config()
vector_db_config.vector_engine_provider = vector_engine_provider vector_db_config.vector_db_provider = vector_db_provider
@staticmethod @staticmethod
def set_vector_db_key(db_key: str): def set_vector_db_key(db_key: str):

View file

@ -9,7 +9,7 @@ class VectorConfig(BaseSettings):
"cognee.lancedb" "cognee.lancedb"
) )
vector_db_key: str = "" vector_db_key: str = ""
vector_engine_provider: str = "lancedb" vector_db_provider: str = "lancedb"
model_config = SettingsConfigDict(env_file = ".env", extra = "allow") model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
@ -17,7 +17,7 @@ class VectorConfig(BaseSettings):
return { return {
"vector_db_url": self.vector_db_url, "vector_db_url": self.vector_db_url,
"vector_db_key": self.vector_db_key, "vector_db_key": self.vector_db_key,
"vector_db_provider": self.vector_engine_provider, "vector_db_provider": self.vector_db_provider,
} }
@lru_cache @lru_cache

View file

@ -108,11 +108,12 @@ class WeaviateAdapter(VectorDBInterface):
filters = Filter.by_id().contains_any(data_point_ids) filters = Filter.by_id().contains_any(data_point_ids)
) )
for data_point in data_points: for data_point in data_points.objects:
data_point.payload = data_point.properties data_point.payload = data_point.properties
data_point.id = data_point.uuid
del data_point.properties del data_point.properties
future.set_result(data_points) future.set_result(data_points.objects)
return await future return await future

View file

@ -4,7 +4,6 @@ import os
from pathlib import Path from pathlib import Path
from typing import List, Type from typing import List, Type
import aiofiles
import openai import openai
import instructor import instructor
from pydantic import BaseModel from pydantic import BaseModel
@ -13,9 +12,7 @@ from tenacity import retry, stop_after_attempt
from cognee.base_config import get_base_config from cognee.base_config import get_base_config
from cognee.infrastructure.llm.llm_interface import LLMInterface from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.prompts import read_query_prompt from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.shared.data_models import MonitoringTool # from cognee.shared.data_models import MonitoringTool
import logging
logging.basicConfig(level=logging.DEBUG)
class OpenAIAdapter(LLMInterface): class OpenAIAdapter(LLMInterface):
name = "OpenAI" name = "OpenAI"

View file

@ -100,8 +100,8 @@ def get_settings() -> SettingsDict:
}, },
vector_db = { vector_db = {
"provider": { "provider": {
"label": vector_config.vector_engine_provider, "label": vector_config.vector_db_provider,
"value": vector_config.vector_engine_provider.lower(), "value": vector_config.vector_db_provider.lower(),
}, },
"url": vector_config.vector_db_url, "url": vector_config.vector_db_url,
"api_key": vector_config.vector_db_key, "api_key": vector_config.vector_db_key,

View file

@ -12,4 +12,4 @@ async def save_vector_db_config(vector_db_config: VectorDBConfig):
vector_config.vector_db_url = vector_db_config.url vector_config.vector_db_url = vector_db_config.url
vector_config.vector_db_key = vector_db_config.api_key vector_config.vector_db_key = vector_db_config.api_key
vector_config.vector_engine_provider = vector_db_config.provider vector_config.vector_db_provider = vector_db_config.provider

View file

@ -25,7 +25,8 @@ class User(SQLAlchemyBaseUserTableUUID, Principal):
from fastapi_users import schemas from fastapi_users import schemas
class UserRead(schemas.BaseUser[uuid_UUID]): class UserRead(schemas.BaseUser[uuid_UUID]):
groups: list[uuid_UUID] # Add groups attribute # groups: list[uuid_UUID] # Add groups attribute
pass
class UserCreate(schemas.BaseUserCreate): class UserCreate(schemas.BaseUserCreate):
pass pass

View file

@ -14,7 +14,7 @@ async def chunk_update_check(data_chunks: list[DocumentChunk], collection_name:
[str(chunk.chunk_id) for chunk in data_chunks], [str(chunk.chunk_id) for chunk in data_chunks],
) )
existing_chunks_map = {chunk.id: chunk.payload for chunk in existing_chunks} existing_chunks_map = {str(chunk.id): chunk.payload for chunk in existing_chunks}
affected_data_chunks = [] affected_data_chunks = []

View file

@ -23,7 +23,7 @@ async def query_graph_connections(query: str, exploration_levels = 1) -> list[(s
exact_node = await graph_engine.extract_node(node_id) exact_node = await graph_engine.extract_node(node_id)
if exact_node is not None and "uuid" in exact_node: if exact_node is not None and "uuid" in exact_node:
node_connections = await graph_engine.get_connections(exact_node["uuid"]) node_connections = await graph_engine.get_connections(str(exact_node["uuid"]))
else: else:
vector_engine = get_vector_engine() vector_engine = get_vector_engine()
results = await asyncio.gather( results = await asyncio.gather(
@ -37,7 +37,7 @@ async def query_graph_connections(query: str, exploration_levels = 1) -> list[(s
return [] return []
node_connections_results = await asyncio.gather( node_connections_results = await asyncio.gather(
*[graph_engine.get_connections(result.payload["uuid"]) for result in relevant_results] *[graph_engine.get_connections(str(result.payload["uuid"])) for result in relevant_results]
) )
node_connections = [] node_connections = []

View file

@ -9,7 +9,7 @@ from cognee.api.v1.search import SearchType
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
async def main(): async def main():
cognee.config.set_vector_engine_provider("qdrant") cognee.config.set_vector_db_provider("qdrant")
data_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_qdrant")).resolve()) data_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_qdrant")).resolve())
cognee.config.data_root_directory(data_directory_path) cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_qdrant")).resolve()) cognee_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_qdrant")).resolve())

View file

@ -7,7 +7,7 @@ from cognee.api.v1.search import SearchType
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
async def main(): async def main():
cognee.config.set_vector_engine_provider("weaviate") cognee.config.set_vector_db_provider("weaviate")
data_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_weaviate")).resolve()) data_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_weaviate")).resolve())
cognee.config.data_root_directory(data_directory_path) cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_weaviate")).resolve()) cognee_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_weaviate")).resolve())

View file

@ -3,8 +3,19 @@
echo "Debug mode: $DEBUG" echo "Debug mode: $DEBUG"
echo "Environment: $ENVIRONMENT" echo "Environment: $ENVIRONMENT"
# Run migrations
poetry run alembic upgrade head # # Run Alembic migrations
# echo "Running database migrations..."
# poetry run alembic upgrade head
# # Check if the migrations were successful
# if [ $? -eq 0 ]; then
# echo "Migrations completed successfully."
# else
# echo "Migration failed, exiting."
# exit 1
# fi
echo "Starting Gunicorn" echo "Starting Gunicorn"

Binary file not shown.

14
examples/node/fetch.js Normal file
View file

@ -0,0 +1,14 @@
import nodeFetch from 'node-fetch';
import handleServerErrors from './handleServerErrors.js';
export default function fetch(url, options = {}, token) {
return nodeFetch('http://127.0.0.1:8000/api' + url, {
...options,
headers: {
...options.headers,
'Authorization': `Bearer ${token}`,
},
})
.then(handleServerErrors)
.catch(handleServerErrors);
}

View file

@ -0,0 +1,16 @@
export default function handleServerErrors(response) {
return new Promise((resolve, reject) => {
if (response.status === 401) {
return reject(new Error('Unauthorized'));
}
if (!response.ok) {
if (response.json) {
return response.json().then(error => reject(error));
} else {
return reject(response.detail || response.body || response);
}
}
return resolve(response);
});
}

122
examples/node/main.js Normal file
View file

@ -0,0 +1,122 @@
import fs from 'fs';
import FormData from 'form-data';
import fetch from './fetch.js';
async function run() {
try {
// Default user is created automatically, you can create a new user if needed.
// const registerResponse = await fetch('/v1/auth/register', {
// method: 'POST',
// body: {
// email: 'default_user@example.com',
// password: 'default_password',
// is_active: true,
// is_superuser: true,
// is_verified: true
// },
// headers: {
// 'Content-Type': 'application/json',
// },
// });
// const user = await registerResponse.json();
const authCredentials = new FormData();
authCredentials.append('username', 'default_user@example.com');
authCredentials.append('password', 'default_password');
const loginResponse = await fetch('/v1/auth/login', {
method: 'POST',
body: authCredentials,
});
const bearer = await loginResponse.json();
const token = bearer.access_token;
const response = await fetch('/v1/datasets', {}, token);
const datasets = await response.json();
console.log(datasets);
const files = [
fs.createReadStream('../data/artificial_intelligence.pdf'),
];
const addData = new FormData();
files.forEach((file) => {
addData.append('data', file, file.name);
})
addData.append('datasetId', 'main');
await fetch('/v1/add', {
method: 'POST',
body: addData,
headers: addData.getHeaders(),
}, token);
await fetch('/v1/cognify', {
method: 'POST',
body: JSON.stringify({
datasets: ['main'],
}),
headers: {
'Content-Type': 'application/json',
}
}, token);
const graphResponse = await fetch('/v1/datasets/main/graph', {
method: 'GET',
}, token);
const graphUrl = await graphResponse.text();
console.log('Graph URL:', graphUrl);
// Search for summaries
const summariesResponse = await fetch('/v1/search', {
method: 'POST',
body: JSON.stringify({
searchType: 'SUMMARIES',
query: 'Artificial Intelligence',
}),
headers: {
'Content-Type': 'application/json',
}
}, token);
const summariesResults = await summariesResponse.json();
console.log('Summaries Results:', summariesResults);
// Search for chunks
const chunksResponse = await fetch('/v1/search', {
method: 'POST',
body: JSON.stringify({
searchType: 'CHUNKS',
query: 'Artificial Intelligence',
}),
headers: {
'Content-Type': 'application/json',
}
}, token);
const chunksResults = await chunksResponse.json();
console.log('Chunks Results:', chunksResults);
// Search for insights
const insightsResponse = await fetch('/v1/search', {
method: 'POST',
body: JSON.stringify({
searchType: 'INSIGHTS',
query: 'Artificial Intelligence',
}),
headers: {
'Content-Type': 'application/json',
}
}, token);
const insightsResults = await insightsResponse.json();
console.log('Insights Results:', insightsResults);
} catch (error) {
console.error('Error:', error);
}
}
run();

156
examples/node/package-lock.json generated Normal file
View file

@ -0,0 +1,156 @@
{
"name": "node-example",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "node-example",
"version": "1.0.0",
"dependencies": {
"form-data": "^4.0.1",
"node-fetch": "^3.3.2"
}
},
"node_modules/asynckit": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
},
"node_modules/combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"dependencies": {
"delayed-stream": "~1.0.0"
},
"engines": {
"node": ">= 0.8"
}
},
"node_modules/data-uri-to-buffer": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
"integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==",
"engines": {
"node": ">= 12"
}
},
"node_modules/delayed-stream": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
"engines": {
"node": ">=0.4.0"
}
},
"node_modules/fetch-blob": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz",
"integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
},
{
"type": "paypal",
"url": "https://paypal.me/jimmywarting"
}
],
"dependencies": {
"node-domexception": "^1.0.0",
"web-streams-polyfill": "^3.0.3"
},
"engines": {
"node": "^12.20 || >= 14.13"
}
},
"node_modules/form-data": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.1.tgz",
"integrity": "sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==",
"dependencies": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.8",
"mime-types": "^2.1.12"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/formdata-polyfill": {
"version": "4.0.10",
"resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
"integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==",
"dependencies": {
"fetch-blob": "^3.1.2"
},
"engines": {
"node": ">=12.20.0"
}
},
"node_modules/mime-db": {
"version": "1.52.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/mime-types": {
"version": "2.1.35",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
"dependencies": {
"mime-db": "1.52.0"
},
"engines": {
"node": ">= 0.6"
}
},
"node_modules/node-domexception": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
},
{
"type": "github",
"url": "https://paypal.me/jimmywarting"
}
],
"engines": {
"node": ">=10.5.0"
}
},
"node_modules/node-fetch": {
"version": "3.3.2",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz",
"integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==",
"dependencies": {
"data-uri-to-buffer": "^4.0.0",
"fetch-blob": "^3.1.4",
"formdata-polyfill": "^4.0.10"
},
"engines": {
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/node-fetch"
}
},
"node_modules/web-streams-polyfill": {
"version": "3.3.3",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
"integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==",
"engines": {
"node": ">= 8"
}
}
}
}

View file

@ -0,0 +1,14 @@
{
"type": "module",
"name": "node-example",
"version": "1.0.0",
"description": "Node example calling Cognee API",
"main": "main.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"dependencies": {
"form-data": "^4.0.1",
"node-fetch": "^3.3.2"
}
}

View file

@ -548,7 +548,7 @@
"#GRAPH_DATABASE_USERNAME=\"\"\n", "#GRAPH_DATABASE_USERNAME=\"\"\n",
"#GRAPH_DATABASE_PASSWORD=\"\"\n", "#GRAPH_DATABASE_PASSWORD=\"\"\n",
"\n", "\n",
"os.environ[\"VECTOR_ENGINE_PROVIDER\"]=\"lancedb\" # \"qdrant\", \"weaviate\" or \"lancedb\"\n", "os.environ[\"VECTOR_DB_PROVIDER\"]=\"lancedb\" # \"qdrant\", \"weaviate\" or \"lancedb\"\n",
"# Not needed if using \"lancedb\"\n", "# Not needed if using \"lancedb\"\n",
"# os.environ[\"VECTOR_DB_URL\"]=\"\"\n", "# os.environ[\"VECTOR_DB_URL\"]=\"\"\n",
"# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n", "# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n",

38
poetry.lock generated
View file

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. # This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
[[package]] [[package]]
name = "aiofiles" name = "aiofiles"
@ -1350,13 +1350,13 @@ files = [
[[package]] [[package]]
name = "dlt" name = "dlt"
version = "1.0.0" version = "1.2.0"
description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run." description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run."
optional = false optional = false
python-versions = "<3.13,>=3.8.1" python-versions = "<3.13,>=3.8.1"
files = [ files = [
{file = "dlt-1.0.0-py3-none-any.whl", hash = "sha256:730474cadcbc8151854d2c2999099225df3fe2b03fcfe716bc42e0b1a7707484"}, {file = "dlt-1.2.0-py3-none-any.whl", hash = "sha256:85256c0f87fe3cc1eedc390e6e3a31820250ac1f75bb9510bcf4085d069427ce"},
{file = "dlt-1.0.0.tar.gz", hash = "sha256:757ca3b1fe19d47720f22ad45d0642077ccafe2e64094ef30da478ca50a392c4"}, {file = "dlt-1.2.0.tar.gz", hash = "sha256:3e3c8604ea2fb213f0901cecab018909570824e5addbb45954c2c274f1439b2c"},
] ]
[package.dependencies] [package.dependencies]
@ -1397,12 +1397,12 @@ clickhouse = ["adlfs (>=2022.4.0)", "clickhouse-connect (>=0.7.7)", "clickhouse-
databricks = ["databricks-sql-connector (>=2.9.3)"] databricks = ["databricks-sql-connector (>=2.9.3)"]
deltalake = ["deltalake (>=0.19.0)", "pyarrow (>=12.0.0)"] deltalake = ["deltalake (>=0.19.0)", "pyarrow (>=12.0.0)"]
dremio = ["pyarrow (>=12.0.0)"] dremio = ["pyarrow (>=12.0.0)"]
duckdb = ["duckdb (>=0.6.1,<0.11)"] duckdb = ["duckdb (>=0.9)"]
filesystem = ["botocore (>=1.28)", "s3fs (>=2022.4.0)"] filesystem = ["botocore (>=1.28)", "s3fs (>=2022.4.0)"]
gcp = ["gcsfs (>=2022.4.0)", "google-cloud-bigquery (>=2.26.0)", "grpcio (>=1.50.0)"] gcp = ["gcsfs (>=2022.4.0)", "google-cloud-bigquery (>=2.26.0)", "grpcio (>=1.50.0)"]
gs = ["gcsfs (>=2022.4.0)"] gs = ["gcsfs (>=2022.4.0)"]
lancedb = ["lancedb (>=0.8.2)", "pyarrow (>=12.0.0)", "tantivy (>=0.22.0)"] lancedb = ["lancedb (>=0.8.2)", "pyarrow (>=12.0.0)", "tantivy (>=0.22.0)"]
motherduck = ["duckdb (>=0.6.1,<0.11)", "pyarrow (>=12.0.0)"] motherduck = ["duckdb (>=0.9)", "pyarrow (>=12.0.0)"]
mssql = ["pyodbc (>=4.0.39)"] mssql = ["pyodbc (>=4.0.39)"]
parquet = ["pyarrow (>=12.0.0)"] parquet = ["pyarrow (>=12.0.0)"]
postgres = ["psycopg2-binary (>=2.9.1)", "psycopg2cffi (>=2.9.0)"] postgres = ["psycopg2-binary (>=2.9.1)", "psycopg2cffi (>=2.9.0)"]
@ -3685,6 +3685,7 @@ optional = false
python-versions = ">=3.6" python-versions = ">=3.6"
files = [ files = [
{file = "mkdocs-redirects-1.2.1.tar.gz", hash = "sha256:9420066d70e2a6bb357adf86e67023dcdca1857f97f07c7fe450f8f1fb42f861"}, {file = "mkdocs-redirects-1.2.1.tar.gz", hash = "sha256:9420066d70e2a6bb357adf86e67023dcdca1857f97f07c7fe450f8f1fb42f861"},
{file = "mkdocs_redirects-1.2.1-py3-none-any.whl", hash = "sha256:497089f9e0219e7389304cffefccdfa1cac5ff9509f2cb706f4c9b221726dffb"},
] ]
[package.dependencies] [package.dependencies]
@ -4913,6 +4914,24 @@ files = [
[package.extras] [package.extras]
test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
[[package]]
name = "psycopg2"
version = "2.9.10"
description = "psycopg2 - Python-PostgreSQL Database Adapter"
optional = false
python-versions = ">=3.8"
files = [
{file = "psycopg2-2.9.10-cp310-cp310-win32.whl", hash = "sha256:5df2b672140f95adb453af93a7d669d7a7bf0a56bcd26f1502329166f4a61716"},
{file = "psycopg2-2.9.10-cp310-cp310-win_amd64.whl", hash = "sha256:c6f7b8561225f9e711a9c47087388a97fdc948211c10a4bccbf0ba68ab7b3b5a"},
{file = "psycopg2-2.9.10-cp311-cp311-win32.whl", hash = "sha256:47c4f9875125344f4c2b870e41b6aad585901318068acd01de93f3677a6522c2"},
{file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"},
{file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"},
{file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"},
{file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"},
{file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"},
{file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"},
]
[[package]] [[package]]
name = "ptyprocess" name = "ptyprocess"
version = "0.7.0" version = "0.7.0"
@ -6259,6 +6278,11 @@ files = [
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"},
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"},
{file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"},
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"},
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"},
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"},
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"},
{file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"},
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"},
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"},
{file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"},
@ -7727,4 +7751,4 @@ weaviate = ["weaviate-client"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.9.0,<3.12" python-versions = ">=3.9.0,<3.12"
content-hash = "75d65fd1b99bf9db84fe026d140f6cb05b02afd31d4ad82a6286076256bd7609" content-hash = "4cba654100a455c8691dd3d4e1b588f00bbb2acca89168954037017b3a6aced9"

View file

@ -38,7 +38,7 @@ greenlet = "^3.0.3"
ruff = "^0.2.2" ruff = "^0.2.2"
filetype = "^1.2.0" filetype = "^1.2.0"
nltk = "^3.8.1" nltk = "^3.8.1"
dlt = {extras = ["sqlalchemy"], version = "^1.0.0"} dlt = {extras = ["sqlalchemy"], version = "^1.2.0"}
overrides = "^7.7.0" overrides = "^7.7.0"
aiofiles = "^23.2.1" aiofiles = "^23.2.1"
qdrant-client = "^1.9.0" qdrant-client = "^1.9.0"
@ -70,7 +70,7 @@ sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
fastapi-users = { version = "*", extras = ["sqlalchemy"] } fastapi-users = { version = "*", extras = ["sqlalchemy"] }
asyncpg = "^0.29.0" asyncpg = "^0.29.0"
alembic = "^1.13.3" alembic = "^1.13.3"
psycopg2 = "^2.9.10"
[tool.poetry.extras] [tool.poetry.extras]
@ -98,7 +98,6 @@ mkdocs-jupyter = "^0.24.6"
mkdocs-minify-plugin = "^0.8.0" mkdocs-minify-plugin = "^0.8.0"
mkdocs-redirects = "^1.2.1" mkdocs-redirects = "^1.2.1"
[tool.poetry.group.test-docs.dependencies] [tool.poetry.group.test-docs.dependencies]
fastapi = "^0.109.2" fastapi = "^0.109.2"
diskcache = "^5.6.3" diskcache = "^5.6.3"