diff --git a/api.py b/api.py index c499702f6..1a7f4ff64 100644 --- a/api.py +++ b/api.py @@ -9,8 +9,8 @@ from fastapi import FastAPI, BackgroundTasks, HTTPException from fastapi.responses import JSONResponse from pydantic import BaseModel -from cognitive_architecture.database.postgres.database import AsyncSessionLocal -from cognitive_architecture.database.postgres.database_crud import session_scope +from cognitive_architecture.database.relationaldb.database import AsyncSessionLocal +from cognitive_architecture.database.relationaldb.database_crud import session_scope from cognitive_architecture.vectorstore_manager import Memory from dotenv import load_dotenv from main import add_documents_to_graph_db, user_context_enrichment diff --git a/assets/architecture.png b/assets/architecture.png index 7d455a118..12d45cbea 100644 Binary files a/assets/architecture.png and b/assets/architecture.png differ diff --git a/cognitive_architecture/config.py b/cognitive_architecture/config.py index 22e0be9ae..d99e532e9 100644 --- a/cognitive_architecture/config.py +++ b/cognitive_architecture/config.py @@ -19,6 +19,8 @@ class Config: memgpt_dir: str = field(default_factory=lambda: os.getenv('COG_ARCH_DIR', 'cognitive_achitecture')) config_path: str = field(default_factory=lambda: os.path.join(os.getenv('COG_ARCH_DIR', 'cognitive_achitecture'), 'config')) + vectordb:str = 'lancedb' + # Model parameters model: str = 'gpt-4-1106-preview' model_endpoint: str = 'openai' diff --git a/cognitive_architecture/database/create_database.py b/cognitive_architecture/database/create_database.py index ef0ddeacc..1bc4ac320 100644 --- a/cognitive_architecture/database/create_database.py +++ b/cognitive_architecture/database/create_database.py @@ -2,62 +2,141 @@ import os import logging import psycopg2 from dotenv import load_dotenv -from postgres.database import Base +from relationaldb.database import Base from sqlalchemy import create_engine, text -from postgres.models import memory -from postgres.models import metadatas -from postgres.models import operation -from postgres.models import sessions -from postgres.models import user -from postgres.models import docs +from relationaldb.models import memory +from relationaldb.models import metadatas +from relationaldb.models import operation +from relationaldb.models import sessions +from relationaldb.models import user +from relationaldb.models import docs load_dotenv() logger = logging.getLogger(__name__) -def create_admin_engine(username, password, host, database_name): - admin_url = f"postgresql://{username}:{password}@{host}:5432/{database_name}" - return create_engine(admin_url) -def database_exists(connection, db_name): - query = text(f"SELECT 1 FROM pg_database WHERE datname='{db_name}'") - result = connection.execute(query).fetchone() - return result is not None +import os +import logging +from sqlalchemy import create_engine, text +from sqlalchemy.exc import SQLAlchemyError +from contextlib import contextmanager +from dotenv import load_dotenv +from relationaldb.database import Base # Assuming all models are imported within this module +from relationaldb.database import DatabaseConfig # Assuming DatabaseConfig is defined as before -def create_database(connection, db_name): - connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) - cursor = connection.cursor() - cursor.execute(f"CREATE DATABASE {db_name}") - cursor.close() +load_dotenv() +logger = logging.getLogger(__name__) -def drop_database(connection, db_name): - connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) - cursor = connection.cursor() - cursor.execute(f"DROP DATABASE IF EXISTS {db_name}") - cursor.close() +class DatabaseManager: + def __init__(self, config: DatabaseConfig): + self.config = config + self.engine = create_engine(config.get_sqlalchemy_database_url()) + self.db_type = config.db_type + @contextmanager + def get_connection(self): + if self.db_type in ['sqlite', 'duckdb']: + # For SQLite and DuckDB, the engine itself manages connections + yield self.engine + else: + connection = self.engine.connect() + try: + yield connection + finally: + connection.close() + def database_exists(self, db_name): + if self.db_type in ['sqlite', 'duckdb']: + # For SQLite and DuckDB, check if the database file exists + return os.path.exists(db_name) + else: + query = text(f"SELECT 1 FROM pg_database WHERE datname='{db_name}'") + with self.get_connection() as connection: + result = connection.execute(query).fetchone() + return result is not None -def create_tables(engine): - Base.metadata.create_all(bind = engine) + def create_database(self, db_name): + if self.db_type not in ['sqlite', 'duckdb']: + # For databases like PostgreSQL, create the database explicitly + with self.get_connection() as connection: + connection.execution_options(isolation_level="AUTOCOMMIT") + connection.execute(f"CREATE DATABASE {db_name}") + + def drop_database(self, db_name): + if self.db_type in ['sqlite', 'duckdb']: + # For SQLite and DuckDB, simply remove the database file + os.remove(db_name) + else: + with self.get_connection() as connection: + connection.execution_options(isolation_level="AUTOCOMMIT") + connection.execute(f"DROP DATABASE IF EXISTS {db_name}") + + def create_tables(self): + Base.metadata.create_all(bind=self.engine) if __name__ == "__main__": - host = os.environ.get('POSTGRES_HOST') - username = os.environ.get('POSTGRES_USER') - password = os.environ.get('POSTGRES_PASSWORD') - database_name = os.environ.get('POSTGRES_DB') + # Example usage with SQLite + config = DatabaseConfig(db_type='sqlite', db_name='mydatabase.db') - engine = create_admin_engine(username, password, host, database_name) - connection = engine.connect() + # For DuckDB, you would set db_type to 'duckdb' and provide the database file name + # config = DatabaseConfig(db_type='duckdb', db_name='mydatabase.duckdb') - # print(Base.metadata.tables) + db_manager = DatabaseManager(config=config) - if not database_exists(connection, database_name): + database_name = config.db_name + + if not db_manager.database_exists(database_name): logger.info(f"Database {database_name} does not exist. Creating...") - create_database(connection, database_name) + db_manager.create_database(database_name) logger.info(f"Database {database_name} created successfully.") - connection.close() - engine.dispose() + db_manager.create_tables() - create_tables(engine) \ No newline at end of file +# +# def create_admin_engine(username, password, host, database_name): +# admin_url = f"postgresql://{username}:{password}@{host}:5432/{database_name}" +# return create_engine(admin_url) +# +# def database_exists(connection, db_name): +# query = text(f"SELECT 1 FROM pg_database WHERE datname='{db_name}'") +# result = connection.execute(query).fetchone() +# return result is not None +# +# def create_database(connection, db_name): +# connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) +# cursor = connection.cursor() +# cursor.execute(f"CREATE DATABASE {db_name}") +# cursor.close() +# +# def drop_database(connection, db_name): +# connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) +# cursor = connection.cursor() +# cursor.execute(f"DROP DATABASE IF EXISTS {db_name}") +# cursor.close() +# +# +# +# def create_tables(engine): +# Base.metadata.create_all(bind = engine) +# +# if __name__ == "__main__": +# host = os.environ.get('POSTGRES_HOST') +# username = os.environ.get('POSTGRES_USER') +# password = os.environ.get('POSTGRES_PASSWORD') +# database_name = os.environ.get('POSTGRES_DB') +# +# engine = create_admin_engine(username, password, host, database_name) +# connection = engine.connect() +# +# # print(Base.metadata.tables) +# +# if not database_exists(connection, database_name): +# logger.info(f"Database {database_name} does not exist. Creating...") +# create_database(connection, database_name) +# logger.info(f"Database {database_name} created successfully.") +# +# connection.close() +# engine.dispose() +# +# create_tables(engine) \ No newline at end of file diff --git a/cognitive_architecture/database/graph_database/__init__.py b/cognitive_architecture/database/graphdb/__init__.py similarity index 100% rename from cognitive_architecture/database/graph_database/__init__.py rename to cognitive_architecture/database/graphdb/__init__.py diff --git a/cognitive_architecture/database/graph_database/graph.py b/cognitive_architecture/database/graphdb/graph.py similarity index 95% rename from cognitive_architecture/database/graph_database/graph.py rename to cognitive_architecture/database/graphdb/graph.py index 60770484f..a2c56502c 100644 --- a/cognitive_architecture/database/graph_database/graph.py +++ b/cognitive_architecture/database/graphdb/graph.py @@ -29,7 +29,10 @@ from typing import List, Dict, Optional from ...utils import format_dict, append_uuid_to_variable_names, create_edge_variable_mapping, \ create_node_variable_mapping, get_unsumarized_vector_db_namespace from ...llm.queries import generate_summary, generate_graph - +import logging +from neo4j import AsyncGraphDatabase, Neo4jError +from contextlib import asynccontextmanager +from typing import Any, Dict, Optional, List DEFAULT_PRESET = "promethai_chat" preset_options = [DEFAULT_PRESET] PROMETHAI_DIR = os.path.join(os.path.expanduser("~"), ".") @@ -70,28 +73,50 @@ class AbstractGraphDB(ABC): class Neo4jGraphDB(AbstractGraphDB): - def __init__(self, url, username, password): - # self.graph = Neo4jGraph(url=url, username=username, password=password) - from neo4j import GraphDatabase - self.driver = GraphDatabase.driver(url, auth=(username, password)) - self.openai_key = config.openai_key + def __init__(self, url: str, username: str, password: str, driver: Optional[Any] = None): + self.driver = driver or AsyncGraphDatabase.driver(url, auth=(username, password)) + async def close(self) -> None: + await self.driver.close() + @asynccontextmanager + async def get_session(self) -> AsyncSession: + async with self.driver.session() as session: + yield session - def close(self): - # Method to close the Neo4j driver instance - self.driver.close() - - def query(self, query, params=None): + async def query(self, query: str, params: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]: try: - with self.driver.session() as session: - result = session.run(query, params).data() - return result - except Exception as e: - logging.error(f"An error occurred while executing the query: {e}") - raise e + async with self.get_session() as session: + result = await session.run(query, parameters=params) + return await result.data() + except Neo4jError as e: + logging.error(f"Neo4j query error: {e.message}") + raise +# class Neo4jGraphDB(AbstractGraphDB): +# def __init__(self, url, username, password): +# # self.graph = Neo4jGraph(url=url, username=username, password=password) +# from neo4j import GraphDatabase +# self.driver = GraphDatabase.driver(url, auth=(username, password)) +# self.openai_key = config.openai_key +# +# +# +# def close(self): +# # Method to close the Neo4j driver instance +# self.driver.close() +# +# def query(self, query, params=None): +# try: +# with self.driver.session() as session: +# result = session.run(query, params).data() +# return result +# except Exception as e: +# logging.error(f"An error occurred while executing the query: {e}") +# raise e +# + def create_base_cognitive_architecture(self, user_id: str): # Create the user and memory components if they don't exist diff --git a/cognitive_architecture/database/graph_database/networkx_graph.py b/cognitive_architecture/database/graphdb/networkx_graph.py similarity index 100% rename from cognitive_architecture/database/graph_database/networkx_graph.py rename to cognitive_architecture/database/graphdb/networkx_graph.py diff --git a/cognitive_architecture/database/postgres/database.py b/cognitive_architecture/database/postgres/database.py deleted file mode 100644 index eb344028e..000000000 --- a/cognitive_architecture/database/postgres/database.py +++ /dev/null @@ -1,94 +0,0 @@ -import os -from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession -from sqlalchemy.orm import declarative_base, sessionmaker -from contextlib import asynccontextmanager -from sqlalchemy.exc import OperationalError -import asyncio -import sys -from dotenv import load_dotenv - -load_dotenv() - - -# this is needed to import classes from other modules -# script_dir = os.path.dirname(os.path.abspath(__file__)) -# # Get the parent directory of your script and add it to sys.path -# parent_dir = os.path.dirname(script_dir) -# sys.path.append(parent_dir) -# from ...config import Config -# config = Config() -# config.load() - - -# in seconds -MAX_RETRIES = 3 -RETRY_DELAY = 5 - -import os - - -if os.environ.get('AWS_ENV') == 'prd' or os.environ.get('AWS_ENV') == 'dev': - host = os.environ.get('POSTGRES_HOST') - username = os.environ.get('POSTGRES_USER') - password = os.environ.get('POSTGRES_PASSWORD') - database_name = os.environ.get('POSTGRES_DB') -elif os.environ.get('AWS_ENV') == 'local': - host = os.environ.get('POSTGRES_HOST') - username = os.environ.get('POSTGRES_USER') - password = os.environ.get('POSTGRES_PASSWORD') - database_name = os.environ.get('POSTGRES_DB') -else: - host = os.environ.get('POSTGRES_HOST') - username = os.environ.get('POSTGRES_USER') - password = os.environ.get('POSTGRES_PASSWORD') - database_name = os.environ.get('POSTGRES_DB') - -# host = config.postgres_host -# username = config.postgres_user -# password = config.postgres_password -# database_name = config.postgres_db - - - - - -# Use the asyncpg driver for async operation -SQLALCHEMY_DATABASE_URL = f"postgresql+asyncpg://{username}:{password}@{host}:5432/{database_name}" - - -engine = create_async_engine( - SQLALCHEMY_DATABASE_URL, - pool_recycle=3600, - echo=True # Enable logging for tutorial purposes -) -# Use AsyncSession for the session -AsyncSessionLocal = sessionmaker( - bind=engine, - class_=AsyncSession, - expire_on_commit=False, -) - -Base = declarative_base() - -# Use asynccontextmanager to define an async context manager -@asynccontextmanager -async def get_db(): - db = AsyncSessionLocal() - try: - yield db - finally: - await db.close() - -# Use async/await syntax for the async function -async def safe_db_operation(db_op, *args, **kwargs): - for attempt in range(MAX_RETRIES): - async with get_db() as db: - try: - # Ensure your db_op is also async - return await db_op(db, *args, **kwargs) - except OperationalError as e: - await db.rollback() - if "server closed the connection unexpectedly" in str(e) and attempt < MAX_RETRIES - 1: - await asyncio.sleep(RETRY_DELAY) - else: - raise \ No newline at end of file diff --git a/cognitive_architecture/database/postgres/__init__.py b/cognitive_architecture/database/relationaldb/__init__.py similarity index 100% rename from cognitive_architecture/database/postgres/__init__.py rename to cognitive_architecture/database/relationaldb/__init__.py diff --git a/cognitive_architecture/database/relationaldb/database.py b/cognitive_architecture/database/relationaldb/database.py new file mode 100644 index 000000000..f0f0eb6a4 --- /dev/null +++ b/cognitive_architecture/database/relationaldb/database.py @@ -0,0 +1,130 @@ +import json +import os +from pathlib import Path + +from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession +from sqlalchemy.orm import declarative_base, sessionmaker +from contextlib import asynccontextmanager +from sqlalchemy.exc import OperationalError +import asyncio +import sys +from dotenv import load_dotenv + +load_dotenv() + + +# this is needed to import classes from other modules +# script_dir = os.path.dirname(os.path.abspath(__file__)) +# # Get the parent directory of your script and add it to sys.path +# parent_dir = os.path.dirname(script_dir) +# sys.path.append(parent_dir) +# from ...config import Config +# config = Config() +# config.load() + + +# in seconds +MAX_RETRIES = 3 +RETRY_DELAY = 5 + +import os + + +class DatabaseConfig: + def __init__(self, db_type=None, db_name=None, host=None, user=None, password=None, port=None, config_file=None): + if config_file: + self.load_from_file(config_file) + else: + # Load default values from environment variables or use provided values + self.db_type = db_type or os.getenv('DB_TYPE', 'sqlite') + self.db_name = db_name or os.getenv('DB_NAME', 'database.db') + self.host = host or os.getenv('DB_HOST', 'localhost') + self.user = user or os.getenv('DB_USER', 'user') + self.password = password or os.getenv('DB_PASSWORD', 'password') + self.port = port or os.getenv('DB_PORT', '5432') + + def load_from_file(self, file_path): + with open(file_path, 'r') as file: + config = json.load(file) + self.db_type = config.get('db_type', 'sqlite') + self.db_name = config.get('db_name', 'database.db') + self.host = config.get('host', 'localhost') + self.user = config.get('user', 'user') + self.password = config.get('password', 'password') + self.port = config.get('port', '5432') + + def get_sqlalchemy_database_url(self): + if self.db_type == 'sqlite': + db_path = Path(self.db_name).absolute() # Ensure the path is absolute + return f"sqlite+aiosqlite:///{db_path}" # SQLite uses file path + elif self.db_type == 'duckdb': + db_path = Path(self.db_name).absolute() # Ensure the path is absolute for DuckDB as well + return f"duckdb+aiosqlite:///{db_path}" + elif self.db_type == 'postgresql': + # Ensure optional parameters are handled gracefully + port_str = f":{self.port}" if self.port else "" + password_str = f":{self.password}" if self.password else "" + return f"postgresql+asyncpg://{self.user}{password_str}@{self.host}{port_str}/{self.db_name}" + else: + raise ValueError(f"Unsupported DB_TYPE: {self.db_type}") + +# Example usage with a configuration file: +# config = DatabaseConfig(config_file='path/to/config.json') +# Or set them programmatically: +config = DatabaseConfig(db_type='postgresql', db_name='mydatabase', user='myuser', password='mypassword', host='myhost', port='5432') + +SQLALCHEMY_DATABASE_URL = config.get_sqlalchemy_database_url() + + +engine = create_async_engine( + SQLALCHEMY_DATABASE_URL, + pool_recycle=3600, + echo=True # Enable logging for tutorial purposes +) +# Use AsyncSession for the session +AsyncSessionLocal = sessionmaker( + bind=engine, + class_=AsyncSession, + expire_on_commit=False, +) + +Base = declarative_base() + +# Use asynccontextmanager to define an async context manager +@asynccontextmanager +async def get_db(): + db = AsyncSessionLocal() + try: + yield db + finally: + await db.close() + +# +# if os.environ.get('AWS_ENV') == 'prd' or os.environ.get('AWS_ENV') == 'dev': +# host = os.environ.get('POSTGRES_HOST') +# username = os.environ.get('POSTGRES_USER') +# password = os.environ.get('POSTGRES_PASSWORD') +# database_name = os.environ.get('POSTGRES_DB') +# elif os.environ.get('AWS_ENV') == 'local': +# host = os.environ.get('POSTGRES_HOST') +# username = os.environ.get('POSTGRES_USER') +# password = os.environ.get('POSTGRES_PASSWORD') +# database_name = os.environ.get('POSTGRES_DB') +# else: +# host = os.environ.get('POSTGRES_HOST') +# username = os.environ.get('POSTGRES_USER') +# password = os.environ.get('POSTGRES_PASSWORD') +# database_name = os.environ.get('POSTGRES_DB') +# +# # host = config.postgres_host +# # username = config.postgres_user +# # password = config.postgres_password +# # database_name = config.postgres_db +# +# +# +# +# +# # Use the asyncpg driver for async operation +# SQLALCHEMY_DATABASE_URL = f"postgresql+asyncpg://{username}:{password}@{host}:5432/{database_name}" + diff --git a/cognitive_architecture/database/postgres/database_crud.py b/cognitive_architecture/database/relationaldb/database_crud.py similarity index 100% rename from cognitive_architecture/database/postgres/database_crud.py rename to cognitive_architecture/database/relationaldb/database_crud.py diff --git a/cognitive_architecture/database/postgres/models/__init__.py b/cognitive_architecture/database/relationaldb/models/__init__.py similarity index 100% rename from cognitive_architecture/database/postgres/models/__init__.py rename to cognitive_architecture/database/relationaldb/models/__init__.py diff --git a/cognitive_architecture/database/postgres/models/docs.py b/cognitive_architecture/database/relationaldb/models/docs.py similarity index 100% rename from cognitive_architecture/database/postgres/models/docs.py rename to cognitive_architecture/database/relationaldb/models/docs.py diff --git a/cognitive_architecture/database/postgres/models/memory.py b/cognitive_architecture/database/relationaldb/models/memory.py similarity index 100% rename from cognitive_architecture/database/postgres/models/memory.py rename to cognitive_architecture/database/relationaldb/models/memory.py diff --git a/cognitive_architecture/database/postgres/models/metadatas.py b/cognitive_architecture/database/relationaldb/models/metadatas.py similarity index 100% rename from cognitive_architecture/database/postgres/models/metadatas.py rename to cognitive_architecture/database/relationaldb/models/metadatas.py diff --git a/cognitive_architecture/database/postgres/models/operation.py b/cognitive_architecture/database/relationaldb/models/operation.py similarity index 100% rename from cognitive_architecture/database/postgres/models/operation.py rename to cognitive_architecture/database/relationaldb/models/operation.py diff --git a/cognitive_architecture/database/postgres/models/sessions.py b/cognitive_architecture/database/relationaldb/models/sessions.py similarity index 100% rename from cognitive_architecture/database/postgres/models/sessions.py rename to cognitive_architecture/database/relationaldb/models/sessions.py diff --git a/cognitive_architecture/database/postgres/models/user.py b/cognitive_architecture/database/relationaldb/models/user.py similarity index 100% rename from cognitive_architecture/database/postgres/models/user.py rename to cognitive_architecture/database/relationaldb/models/user.py diff --git a/cognitive_architecture/database/vectordb/basevectordb.py b/cognitive_architecture/database/vectordb/basevectordb.py index f6a507247..06276bfaf 100644 --- a/cognitive_architecture/database/vectordb/basevectordb.py +++ b/cognitive_architecture/database/vectordb/basevectordb.py @@ -16,12 +16,12 @@ from langchain.retrievers import WeaviateHybridSearchRetriever from weaviate.gql.get import HybridFusion -from cognitive_architecture.database.postgres.models.sessions import Session -from cognitive_architecture.database.postgres.models.metadatas import MetaDatas -from cognitive_architecture.database.postgres.models.operation import Operation -from cognitive_architecture.database.postgres.models.docs import DocsModel +from cognitive_architecture.database.relationaldb.models.sessions import Session +from cognitive_architecture.database.relationaldb.models.metadatas import MetaDatas +from cognitive_architecture.database.relationaldb.models.operation import Operation +from cognitive_architecture.database.relationaldb.models.docs import DocsModel from sqlalchemy.orm import sessionmaker -from cognitive_architecture.database.postgres.database import engine +from cognitive_architecture.database.relationaldb.database import engine load_dotenv() from typing import Optional import time @@ -31,12 +31,13 @@ tracemalloc.start() from datetime import datetime from langchain.embeddings.openai import OpenAIEmbeddings -from cognitive_architecture.database.vectordb.vectordb import PineconeVectorDB, WeaviateVectorDB +from cognitive_architecture.database.vectordb.vectordb import PineconeVectorDB, WeaviateVectorDB, LanceDB from langchain.schema import Document import uuid import weaviate from marshmallow import Schema, fields import json +from vector_db_type import VectorDBType OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") @@ -45,8 +46,9 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") class VectorDBFactory: def __init__(self): self.db_map = { - "pinecone": PineconeVectorDB, - "weaviate": WeaviateVectorDB, + VectorDBType.PINECONE: PineconeVectorDB, + VectorDBType.WEAVIATE: WeaviateVectorDB, + VectorDBType.LANCEDB: LanceDB, # Add more database types and their corresponding classes here } @@ -55,7 +57,7 @@ class VectorDBFactory: user_id: str, index_name: str, memory_id: str, - db_type: str = "weaviate", + db_type: str, namespace: str = None, embeddings=None, ): @@ -100,58 +102,6 @@ class BaseMemory: return self.vector_db.init_client(embeddings, namespace) -# class VectorDBFactory: -# def create_vector_db( -# self, -# user_id: str, -# index_name: str, -# memory_id: str, -# db_type: str = "pinecone", -# namespace: str = None, -# embeddings = None, -# ): -# db_map = {"pinecone": PineconeVectorDB, "weaviate": WeaviateVectorDB} -# -# if db_type in db_map: -# return db_map[db_type]( -# user_id, -# index_name, -# memory_id, -# namespace, -# embeddings -# ) -# -# raise ValueError(f"Unsupported database type: {db_type}") -# -# class BaseMemory: -# def __init__( -# self, -# user_id: str, -# memory_id: Optional[str], -# index_name: Optional[str], -# db_type: str, -# namespace: str, -# embeddings: Optional[None], -# ): -# self.user_id = user_id -# self.memory_id = memory_id -# self.index_name = index_name -# self.namespace = namespace -# self.embeddings = embeddings -# self.db_type = db_type -# factory = VectorDBFactory() -# self.vector_db = factory.create_vector_db( -# self.user_id, -# self.index_name, -# self.memory_id, -# db_type=self.db_type, -# namespace=self.namespace, -# embeddings=self.embeddings -# ) -# -# def init_client(self, embeddings, namespace: str): -# -# return self.vector_db.init_weaviate_client(embeddings, namespace) def create_field(self, field_type, **kwargs): field_mapping = { diff --git a/cognitive_architecture/database/vectordb/vector_db_type.py b/cognitive_architecture/database/vectordb/vector_db_type.py index b7983e8bc..83cd3acf5 100644 --- a/cognitive_architecture/database/vectordb/vector_db_type.py +++ b/cognitive_architecture/database/vectordb/vector_db_type.py @@ -10,4 +10,5 @@ class VectorDBType(Enum): PGVECTOR = 'pgvector' REDIS = 'redis' LANCEDB = 'lancedb' - MONGODB = 'mongodb' \ No newline at end of file + MONGODB = 'mongodb' + FAISS = 'faiss' diff --git a/cognitive_architecture/database/vectordb/vectordb.py b/cognitive_architecture/database/vectordb/vectordb.py index f0581a356..e16788397 100644 --- a/cognitive_architecture/database/vectordb/vectordb.py +++ b/cognitive_architecture/database/vectordb/vectordb.py @@ -415,3 +415,41 @@ class WeaviateVectorDB(VectorDB): consistency_level=weaviate.data.replication.ConsistencyLevel.ALL, # default QUORUM ) return + +import os +import lancedb +from pydantic import BaseModel +from typing import List, Optional +import pandas as pd +import pyarrow as pa +class LanceDB(VectorDB): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.db = self.init_lancedb() + + def init_lancedb(self): + # Initialize LanceDB connection + # Adjust the URI as needed for your LanceDB setup + uri = "s3://my-bucket/lancedb" if self.namespace else "~/.lancedb" + db = lancedb.connect(uri, api_key=os.getenv("LANCEDB_API_KEY")) + return db + + def create_table(self, name: str, schema: Optional[pa.Schema] = None, data: Optional[pd.DataFrame] = None): + # Create a table in LanceDB. If schema is not provided, it will be inferred from the data. + if data is not None and schema is None: + schema = pa.Schema.from_pandas(data) + table = self.db.create_table(name, schema=schema) + if data is not None: + table.add(data.to_dict('records')) + return table + + def add_memories(self, table_name: str, data: pd.DataFrame): + # Add data to an existing table in LanceDB + table = self.db.open_table(table_name) + table.add(data.to_dict('records')) + + def fetch_memories(self, table_name: str, query_vector: List[float], top_k: int = 10): + # Perform a vector search in the specified table + table = self.db.open_table(table_name) + results = table.search(query_vector).limit(top_k).to_pandas() + return results diff --git a/cognitive_architecture/utils.py b/cognitive_architecture/utils.py index 8b33245d5..50b1ff037 100644 --- a/cognitive_architecture/utils.py +++ b/cognitive_architecture/utils.py @@ -7,7 +7,7 @@ from graphviz import Digraph from sqlalchemy import or_ from sqlalchemy.orm import contains_eager -from cognitive_architecture.database.postgres.database import AsyncSessionLocal +from cognitive_architecture.database.relationaldb.database import AsyncSessionLocal from dotenv import load_dotenv load_dotenv() @@ -133,12 +133,12 @@ def generate_letter_uuid(length=8): letters = string.ascii_uppercase # A-Z return "".join(random.choice(letters) for _ in range(length)) -from cognitive_architecture.database.postgres.models.operation import Operation -from cognitive_architecture.database.postgres.database_crud import session_scope, add_entity, update_entity, fetch_job_id -from cognitive_architecture.database.postgres.models.metadatas import MetaDatas -from cognitive_architecture.database.postgres.models.docs import DocsModel -from cognitive_architecture.database.postgres.models.memory import MemoryModel -from cognitive_architecture.database.postgres.models.user import User +from cognitive_architecture.database.relationaldb.models.operation import Operation +from cognitive_architecture.database.relationaldb.database_crud import session_scope, add_entity, update_entity, fetch_job_id +from cognitive_architecture.database.relationaldb.models.metadatas import MetaDatas +from cognitive_architecture.database.relationaldb.models.docs import DocsModel +from cognitive_architecture.database.relationaldb.models.memory import MemoryModel +from cognitive_architecture.database.relationaldb.models.user import User from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select import logging diff --git a/cognitive_architecture/vectorstore_manager.py b/cognitive_architecture/vectorstore_manager.py index 46dfd5e77..b30624d8e 100644 --- a/cognitive_architecture/vectorstore_manager.py +++ b/cognitive_architecture/vectorstore_manager.py @@ -11,12 +11,12 @@ print(os.getcwd()) -from cognitive_architecture.database.postgres.models.user import User -from cognitive_architecture.database.postgres.models.memory import MemoryModel +from cognitive_architecture.database.relationaldb.models.user import User +from cognitive_architecture.database.relationaldb.models.memory import MemoryModel import ast import tracemalloc -from cognitive_architecture.database.postgres.database_crud import add_entity +from cognitive_architecture.database.relationaldb.database_crud import add_entity tracemalloc.start() @@ -142,7 +142,7 @@ class Memory: user_id: str = "676", session=None, index_name: str = None, - db_type: str = "weaviate", + db_type: str = None, namespace: str = None, memory_id: str = None, memory_class = None, @@ -196,7 +196,7 @@ class Memory: ) memory_class = DynamicBaseMemory( - memory_label, user_id, str(memory_id), index_name=memory_label , db_type='weaviate', **kwargs + memory_label, user_id, str(memory_id), index_name=memory_label , db_type=config.vectordb, **kwargs ) return cls(user_id=user_id, session=session, memory_id=memory_id, job_id =job_id, memory_class=memory_class, **kwargs) @@ -442,8 +442,8 @@ async def main(): # memory_instance = Memory(namespace='SEMANTICMEMORY') # sss = await memory_instance.dynamic_method_call(memory_instance.semantic_memory_class, 'fetch_memories', observation='some_observation') - from database.postgres.database_crud import session_scope - from database.postgres.database import AsyncSessionLocal + from database.relationaldb.database_crud import session_scope + from database.relationaldb.database import AsyncSessionLocal async with session_scope(AsyncSessionLocal()) as session: memory = await Memory.create_memory("677", session, "SEMANTICMEMORY", namespace="SEMANTICMEMORY") diff --git a/main.py b/main.py index aa3366d5d..9d168a82d 100644 --- a/main.py +++ b/main.py @@ -2,26 +2,26 @@ from typing import Optional, List from neo4j.exceptions import Neo4jError from pydantic import BaseModel, Field -from cognitive_architecture.database.graph_database.graph import Neo4jGraphDB -from cognitive_architecture.database.postgres.models.memory import MemoryModel +from cognitive_architecture.database.graphdb.graph import Neo4jGraphDB +from cognitive_architecture.database.relationaldb.models.memory import MemoryModel from cognitive_architecture.classifiers.classifier import classify_documents import os from dotenv import load_dotenv -from cognitive_architecture.database.postgres.database_crud import session_scope, update_entity_graph_summary -from cognitive_architecture.database.postgres.database import AsyncSessionLocal +from cognitive_architecture.database.relationaldb.database_crud import session_scope, update_entity_graph_summary +from cognitive_architecture.database.relationaldb.database import AsyncSessionLocal from cognitive_architecture.utils import generate_letter_uuid import instructor from openai import OpenAI from cognitive_architecture.vectorstore_manager import Memory -from cognitive_architecture.database.postgres.database_crud import fetch_job_id +from cognitive_architecture.database.relationaldb.database_crud import fetch_job_id import uuid -from cognitive_architecture.database.postgres.models.sessions import Session -from cognitive_architecture.database.postgres.models.operation import Operation -from cognitive_architecture.database.postgres.database_crud import session_scope, add_entity, update_entity, fetch_job_id -from cognitive_architecture.database.postgres.models.metadatas import MetaDatas -from cognitive_architecture.database.postgres.models.docs import DocsModel -from cognitive_architecture.database.postgres.models.memory import MemoryModel -from cognitive_architecture.database.postgres.models.user import User +from cognitive_architecture.database.relationaldb.models.sessions import Session +from cognitive_architecture.database.relationaldb.models.operation import Operation +from cognitive_architecture.database.relationaldb.database_crud import session_scope, add_entity, update_entity, fetch_job_id +from cognitive_architecture.database.relationaldb.models.metadatas import MetaDatas +from cognitive_architecture.database.relationaldb.models.docs import DocsModel +from cognitive_architecture.database.relationaldb.models.memory import MemoryModel +from cognitive_architecture.database.relationaldb.models.user import User from cognitive_architecture.classifiers.classifier import classify_call aclient = instructor.patch(OpenAI()) DEFAULT_PRESET = "promethai_chat" diff --git a/poetry.lock b/poetry.lock index be9d0dc48..4e22f1348 100644 --- a/poetry.lock +++ b/poetry.lock @@ -954,6 +954,48 @@ files = [ marshmallow = ">=3.18.0,<4.0.0" typing-inspect = ">=0.4.0,<1" +[[package]] +name = "debugpy" +version = "1.8.1" +description = "An implementation of the Debug Adapter Protocol for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "debugpy-1.8.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:3bda0f1e943d386cc7a0e71bfa59f4137909e2ed947fb3946c506e113000f741"}, + {file = "debugpy-1.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dda73bf69ea479c8577a0448f8c707691152e6c4de7f0c4dec5a4bc11dee516e"}, + {file = "debugpy-1.8.1-cp310-cp310-win32.whl", hash = "sha256:3a79c6f62adef994b2dbe9fc2cc9cc3864a23575b6e387339ab739873bea53d0"}, + {file = "debugpy-1.8.1-cp310-cp310-win_amd64.whl", hash = "sha256:7eb7bd2b56ea3bedb009616d9e2f64aab8fc7000d481faec3cd26c98a964bcdd"}, + {file = "debugpy-1.8.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:016a9fcfc2c6b57f939673c874310d8581d51a0fe0858e7fac4e240c5eb743cb"}, + {file = "debugpy-1.8.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd97ed11a4c7f6d042d320ce03d83b20c3fb40da892f994bc041bbc415d7a099"}, + {file = "debugpy-1.8.1-cp311-cp311-win32.whl", hash = "sha256:0de56aba8249c28a300bdb0672a9b94785074eb82eb672db66c8144fff673146"}, + {file = "debugpy-1.8.1-cp311-cp311-win_amd64.whl", hash = "sha256:1a9fe0829c2b854757b4fd0a338d93bc17249a3bf69ecf765c61d4c522bb92a8"}, + {file = "debugpy-1.8.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3ebb70ba1a6524d19fa7bb122f44b74170c447d5746a503e36adc244a20ac539"}, + {file = "debugpy-1.8.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2e658a9630f27534e63922ebf655a6ab60c370f4d2fc5c02a5b19baf4410ace"}, + {file = "debugpy-1.8.1-cp312-cp312-win32.whl", hash = "sha256:caad2846e21188797a1f17fc09c31b84c7c3c23baf2516fed5b40b378515bbf0"}, + {file = "debugpy-1.8.1-cp312-cp312-win_amd64.whl", hash = "sha256:edcc9f58ec0fd121a25bc950d4578df47428d72e1a0d66c07403b04eb93bcf98"}, + {file = "debugpy-1.8.1-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:7a3afa222f6fd3d9dfecd52729bc2e12c93e22a7491405a0ecbf9e1d32d45b39"}, + {file = "debugpy-1.8.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d915a18f0597ef685e88bb35e5d7ab968964b7befefe1aaea1eb5b2640b586c7"}, + {file = "debugpy-1.8.1-cp38-cp38-win32.whl", hash = "sha256:92116039b5500633cc8d44ecc187abe2dfa9b90f7a82bbf81d079fcdd506bae9"}, + {file = "debugpy-1.8.1-cp38-cp38-win_amd64.whl", hash = "sha256:e38beb7992b5afd9d5244e96ad5fa9135e94993b0c551ceebf3fe1a5d9beb234"}, + {file = "debugpy-1.8.1-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:bfb20cb57486c8e4793d41996652e5a6a885b4d9175dd369045dad59eaacea42"}, + {file = "debugpy-1.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efd3fdd3f67a7e576dd869c184c5dd71d9aaa36ded271939da352880c012e703"}, + {file = "debugpy-1.8.1-cp39-cp39-win32.whl", hash = "sha256:58911e8521ca0c785ac7a0539f1e77e0ce2df753f786188f382229278b4cdf23"}, + {file = "debugpy-1.8.1-cp39-cp39-win_amd64.whl", hash = "sha256:6df9aa9599eb05ca179fb0b810282255202a66835c6efb1d112d21ecb830ddd3"}, + {file = "debugpy-1.8.1-py2.py3-none-any.whl", hash = "sha256:28acbe2241222b87e255260c76741e1fbf04fdc3b6d094fcf57b6c6f75ce1242"}, + {file = "debugpy-1.8.1.zip", hash = "sha256:f696d6be15be87aef621917585f9bb94b1dc9e8aced570db1b8a6fc14e8f9b42"}, +] + +[[package]] +name = "decorator" +version = "5.1.1" +description = "Decorators for Humans" +optional = false +python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] + [[package]] name = "deprecated" version = "1.2.14" @@ -971,6 +1013,20 @@ wrapt = ">=1.10,<2" [package.extras] dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] +[[package]] +name = "deprecation" +version = "2.1.0" +description = "A library to handle automated deprecations" +optional = false +python-versions = "*" +files = [ + {file = "deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a"}, + {file = "deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff"}, +] + +[package.dependencies] +packaging = "*" + [[package]] name = "distro" version = "1.9.0" @@ -2183,6 +2239,39 @@ files = [ {file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"}, ] +[[package]] +name = "lancedb" +version = "0.5.5" +description = "lancedb" +optional = false +python-versions = ">=3.8" +files = [ + {file = "lancedb-0.5.5-py3-none-any.whl", hash = "sha256:98810855f279469dc6f1626ba1d104cff64fbb6e71661d56961948f213572bac"}, + {file = "lancedb-0.5.5.tar.gz", hash = "sha256:034ef65d1456ca7f0f9dfc33c93d7cbfa893b45411d218258f1c01a1c069bdbf"}, +] + +[package.dependencies] +attrs = ">=21.3.0" +cachetools = "*" +click = ">=8.1.7" +deprecation = "*" +overrides = ">=0.7" +pydantic = ">=1.10" +pylance = "0.9.15" +pyyaml = ">=6.0" +ratelimiter = ">=1.0,<2.0" +requests = ">=2.31.0" +retry = ">=0.9.2" +semver = ">=3.0" +tqdm = ">=4.27.0" + +[package.extras] +clip = ["open-clip", "pillow", "torch"] +dev = ["pre-commit", "ruff"] +docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"] +embeddings = ["InstructorEmbedding", "awscli (>=1.29.57)", "boto3 (>=1.28.57)", "botocore (>=1.31.57)", "cohere", "google.generativeai", "huggingface-hub", "open-clip-torch", "openai (>=1.6.1)", "pillow", "sentence-transformers", "torch"] +tests = ["aiohttp", "duckdb", "pandas (>=1.4)", "polars (>=0.19)", "pytest", "pytest-asyncio", "pytest-mock", "pytz"] + [[package]] name = "langchain" version = "0.0.338" @@ -2682,17 +2771,6 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] -[[package]] -name = "monotonic" -version = "1.6" -description = "An implementation of time.monotonic() for Python 2 & < 3.3" -optional = false -python-versions = "*" -files = [ - {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"}, - {file = "monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7"}, -] - [[package]] name = "mpmath" version = "1.3.0" @@ -3180,6 +3258,17 @@ files = [ {file = "orjson-3.9.10.tar.gz", hash = "sha256:9ebbdbd6a046c304b1845e96fbcc5559cd296b4dfd3ad2509e33c4d9ce07d6a1"}, ] +[[package]] +name = "overrides" +version = "7.7.0" +description = "A decorator to automatically detect mismatch when overriding a method." +optional = false +python-versions = ">=3.6" +files = [ + {file = "overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49"}, + {file = "overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a"}, +] + [[package]] name = "packaging" version = "23.2" @@ -3699,6 +3788,65 @@ files = [ {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, ] +[[package]] +name = "py" +version = "1.11.0" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, + {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, +] + +[[package]] +name = "pyarrow" +version = "15.0.0" +description = "Python library for Apache Arrow" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyarrow-15.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:0a524532fd6dd482edaa563b686d754c70417c2f72742a8c990b322d4c03a15d"}, + {file = "pyarrow-15.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a6bdb314affa9c2e0d5dddf3d9cbb9ef4a8dddaa68669975287d47ece67642"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:66958fd1771a4d4b754cd385835e66a3ef6b12611e001d4e5edfcef5f30391e2"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f500956a49aadd907eaa21d4fff75f73954605eaa41f61cb94fb008cf2e00c6"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6f87d9c4f09e049c2cade559643424da84c43a35068f2a1c4653dc5b1408a929"}, + {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:85239b9f93278e130d86c0e6bb455dcb66fc3fd891398b9d45ace8799a871a1e"}, + {file = "pyarrow-15.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b8d43e31ca16aa6e12402fcb1e14352d0d809de70edd185c7650fe80e0769e3"}, + {file = "pyarrow-15.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:fa7cd198280dbd0c988df525e50e35b5d16873e2cdae2aaaa6363cdb64e3eec5"}, + {file = "pyarrow-15.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8780b1a29d3c8b21ba6b191305a2a607de2e30dab399776ff0aa09131e266340"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0ec198ccc680f6c92723fadcb97b74f07c45ff3fdec9dd765deb04955ccf19"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036a7209c235588c2f07477fe75c07e6caced9b7b61bb897c8d4e52c4b5f9555"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2bd8a0e5296797faf9a3294e9fa2dc67aa7f10ae2207920dbebb785c77e9dbe5"}, + {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e8ebed6053dbe76883a822d4e8da36860f479d55a762bd9e70d8494aed87113e"}, + {file = "pyarrow-15.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:17d53a9d1b2b5bd7d5e4cd84d018e2a45bc9baaa68f7e6e3ebed45649900ba99"}, + {file = "pyarrow-15.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9950a9c9df24090d3d558b43b97753b8f5867fb8e521f29876aa021c52fda351"}, + {file = "pyarrow-15.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:003d680b5e422d0204e7287bb3fa775b332b3fce2996aa69e9adea23f5c8f970"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f75fce89dad10c95f4bf590b765e3ae98bcc5ba9f6ce75adb828a334e26a3d40"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca9cb0039923bec49b4fe23803807e4ef39576a2bec59c32b11296464623dc2"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ed5a78ed29d171d0acc26a305a4b7f83c122d54ff5270810ac23c75813585e4"}, + {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6eda9e117f0402dfcd3cd6ec9bfee89ac5071c48fc83a84f3075b60efa96747f"}, + {file = "pyarrow-15.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a3a6180c0e8f2727e6f1b1c87c72d3254cac909e609f35f22532e4115461177"}, + {file = "pyarrow-15.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:19a8918045993349b207de72d4576af0191beef03ea655d8bdb13762f0cd6eac"}, + {file = "pyarrow-15.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d0ec076b32bacb6666e8813a22e6e5a7ef1314c8069d4ff345efa6246bc38593"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5db1769e5d0a77eb92344c7382d6543bea1164cca3704f84aa44e26c67e320fb"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2617e3bf9df2a00020dd1c1c6dce5cc343d979efe10bc401c0632b0eef6ef5b"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:d31c1d45060180131caf10f0f698e3a782db333a422038bf7fe01dace18b3a31"}, + {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:c8c287d1d479de8269398b34282e206844abb3208224dbdd7166d580804674b7"}, + {file = "pyarrow-15.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:07eb7f07dc9ecbb8dace0f58f009d3a29ee58682fcdc91337dfeb51ea618a75b"}, + {file = "pyarrow-15.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:47af7036f64fce990bb8a5948c04722e4e3ea3e13b1007ef52dfe0aa8f23cf7f"}, + {file = "pyarrow-15.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93768ccfff85cf044c418bfeeafce9a8bb0cee091bd8fd19011aff91e58de540"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6ee87fd6892700960d90abb7b17a72a5abb3b64ee0fe8db6c782bcc2d0dc0b4"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:001fca027738c5f6be0b7a3159cc7ba16a5c52486db18160909a0831b063c4e4"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:d1c48648f64aec09accf44140dccb92f4f94394b8d79976c426a5b79b11d4fa7"}, + {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:972a0141be402bb18e3201448c8ae62958c9c7923dfaa3b3d4530c835ac81aed"}, + {file = "pyarrow-15.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:f01fc5cf49081426429127aa2d427d9d98e1cb94a32cb961d583a70b7c4504e6"}, + {file = "pyarrow-15.0.0.tar.gz", hash = "sha256:876858f549d540898f927eba4ef77cd549ad8d24baa3207cf1b72e5788b50e83"}, +] + +[package.dependencies] +numpy = ">=1.16.6,<2" + [[package]] name = "pyasn1" version = "0.5.1" @@ -3905,6 +4053,29 @@ files = [ plugins = ["importlib-metadata"] windows-terminal = ["colorama (>=0.4.6)"] +[[package]] +name = "pylance" +version = "0.9.15" +description = "python wrapper for Lance columnar format" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pylance-0.9.15-cp38-abi3-macosx_10_15_x86_64.whl", hash = "sha256:8bdc0aac869d1bea2d791cc94883dedc850fcc104b89d44d01e64ec071a277d8"}, + {file = "pylance-0.9.15-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e594f6c09240acc674bfa6ff38b48230d401d0882cef249a37459de45cfd7ce"}, + {file = "pylance-0.9.15-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb860bf4d0df25d259b100c009ef2eb02e77c6d4d73be208f81411cd7a50d476"}, + {file = "pylance-0.9.15-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:3ee0497590c06a2a97d5077ae51cb967f96ada8fe73e85722a7719203fd4be1f"}, + {file = "pylance-0.9.15-cp38-abi3-win_amd64.whl", hash = "sha256:9bca8d4d82a1d1d390e2b9f9e9380dde26db6559bc4715f5a342e4c7e268d57b"}, +] + +[package.dependencies] +numpy = ">=1.22" +pyarrow = ">=12" + +[package.extras] +benchmarks = ["pytest-benchmark"] +tests = ["datasets", "duckdb", "ml_dtypes", "pandas", "pillow", "polars[pandas,pyarrow]", "pytest", "tensorflow", "tqdm"] +torch = ["torch"] + [[package]] name = "pymupdf" version = "1.23.8" @@ -4316,6 +4487,20 @@ files = [ [package.extras] full = ["numpy"] +[[package]] +name = "ratelimiter" +version = "1.2.0.post0" +description = "Simple python rate limiting object" +optional = false +python-versions = "*" +files = [ + {file = "ratelimiter-1.2.0.post0-py3-none-any.whl", hash = "sha256:a52be07bc0bb0b3674b4b304550f10c769bbb00fead3072e035904474259809f"}, + {file = "ratelimiter-1.2.0.post0.tar.gz", hash = "sha256:5c395dcabdbbde2e5178ef3f89b568a3066454a6ddc223b76473dac22f89b4f7"}, +] + +[package.extras] +test = ["pytest (>=3.0)", "pytest-asyncio"] + [[package]] name = "regex" version = "2023.12.25" @@ -4471,6 +4656,21 @@ files = [ [package.dependencies] types-setuptools = ">=57.0.0" +[[package]] +name = "retry" +version = "0.9.2" +description = "Easy to use retry decorator." +optional = false +python-versions = "*" +files = [ + {file = "retry-0.9.2-py2.py3-none-any.whl", hash = "sha256:ccddf89761fa2c726ab29391837d4327f819ea14d244c232a1d24c67a2f98606"}, + {file = "retry-0.9.2.tar.gz", hash = "sha256:f8bfa8b99b69c4506d6f5bd3b0aabf77f98cdb17f3c9fc3f5ca820033336fba4"}, +] + +[package.dependencies] +decorator = ">=3.4.2" +py = ">=1.4.26,<2.0.0" + [[package]] name = "retrying" version = "1.3.4" @@ -4742,26 +4942,6 @@ dev = ["click", "cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyl doc = ["jupytext", "matplotlib (>2)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"] test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] -[[package]] -name = "segment-analytics-python" -version = "2.2.3" -description = "The hassle-free way to integrate analytics into any python application." -optional = false -python-versions = ">=3.6.0" -files = [ - {file = "segment-analytics-python-2.2.3.tar.gz", hash = "sha256:0df5908e3df74b4482f33392fdd450df4c8351bf54974376fbe6bf33b0700865"}, - {file = "segment_analytics_python-2.2.3-py2.py3-none-any.whl", hash = "sha256:06cc3d8e79103f02c3878ec66cb66152415473d0d2a142b98a0ee18da972e109"}, -] - -[package.dependencies] -backoff = ">=2.1,<3.0" -monotonic = ">=1.5,<2.0" -python-dateutil = ">=2.2,<3.0" -requests = ">=2.7,<3.0" - -[package.extras] -test = ["flake8 (==3.7.9)", "mock (==2.0.0)", "pylint (==2.8.0)"] - [[package]] name = "semver" version = "3.0.2" @@ -6333,4 +6513,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "fd1827c12c55037c200a399460ca9d17da4c6c1326eb275d18d9bb99cfbd60f3" +content-hash = "671f878d3fc3b864ac68ef553f3f48ac247bfee0ae60540f260fea7fda727e86" diff --git a/pyproject.toml b/pyproject.toml index fa7d6174a..50b823923 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,8 @@ grpcio = "^1.60.0" langdetect = "^1.0.9" iso639 = "^0.1.4" debugpy = "^1.8.0" +lancedb = "^0.5.5" +pyarrow = "^15.0.0" [build-system] requires = ["poetry-core"]