Initial commit, still wip
This commit is contained in:
parent
1f49ca265f
commit
813ee94836
5 changed files with 51 additions and 7 deletions
|
|
@ -57,19 +57,34 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
|
||||||
|
|
||||||
# Set vector and graph database configuration based on dataset database information
|
# Set vector and graph database configuration based on dataset database information
|
||||||
vector_config = {
|
vector_config = {
|
||||||
"vector_db_url": os.path.join(
|
"vector_db_provider": dataset_database.vector_database_provider,
|
||||||
databases_directory_path, dataset_database.vector_database_name
|
"vector_db_url": dataset_database.vector_database_url,
|
||||||
),
|
# TODO: Maybe add key to dataset_database, and put it here??
|
||||||
"vector_db_key": "",
|
"vector_db_key": ""
|
||||||
"vector_db_provider": "lancedb",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# vector_config = {
|
||||||
|
# "vector_db_url": os.path.join(
|
||||||
|
# databases_directory_path, dataset_database.vector_database_name
|
||||||
|
# ),
|
||||||
|
# "vector_db_key": "",
|
||||||
|
# "vector_db_provider": "lancedb",
|
||||||
|
# }
|
||||||
|
|
||||||
graph_config = {
|
graph_config = {
|
||||||
"graph_database_provider": "kuzu",
|
"graph_database_provider": dataset_database.graph_database_provider,
|
||||||
|
"graph_database_url": dataset_database.graph_database_url,
|
||||||
|
"graph_database_name": dataset_database.graph_database_name,
|
||||||
"graph_file_path": os.path.join(
|
"graph_file_path": os.path.join(
|
||||||
databases_directory_path, dataset_database.graph_database_name
|
databases_directory_path, dataset_database.graph_database_name
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
# graph_config = {
|
||||||
|
# "graph_database_provider": "kuzu",
|
||||||
|
# "graph_file_path": os.path.join(
|
||||||
|
# databases_directory_path, dataset_database.graph_database_name
|
||||||
|
# ),
|
||||||
|
# }
|
||||||
|
|
||||||
storage_config = {
|
storage_config = {
|
||||||
"data_root_directory": data_root_directory,
|
"data_root_directory": data_root_directory,
|
||||||
|
|
|
||||||
|
|
@ -69,6 +69,7 @@ def create_graph_engine(
|
||||||
graph_database_url=graph_database_url,
|
graph_database_url=graph_database_url,
|
||||||
graph_database_username=graph_database_username,
|
graph_database_username=graph_database_username,
|
||||||
graph_database_password=graph_database_password,
|
graph_database_password=graph_database_password,
|
||||||
|
graph_name=graph_database_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
if graph_database_provider == "neo4j":
|
if graph_database_provider == "neo4j":
|
||||||
|
|
|
||||||
|
|
@ -6,11 +6,20 @@ from sqlalchemy.exc import IntegrityError
|
||||||
from cognee.modules.data.methods import create_dataset
|
from cognee.modules.data.methods import create_dataset
|
||||||
|
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
from cognee.modules.data.methods import get_unique_dataset_id
|
from cognee.modules.data.methods import get_unique_dataset_id
|
||||||
from cognee.modules.users.models import DatasetDatabase
|
from cognee.modules.users.models import DatasetDatabase
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Find a better place to define these
|
||||||
|
default_vector_db_name = "lance.db"
|
||||||
|
default_vector_db_provider = "lancedb"
|
||||||
|
default_graph_db_provider = "kuzu"
|
||||||
|
default_vector_db_url = None
|
||||||
|
default_graph_db_url = None
|
||||||
|
|
||||||
async def get_or_create_dataset_database(
|
async def get_or_create_dataset_database(
|
||||||
dataset: Union[str, UUID],
|
dataset: Union[str, UUID],
|
||||||
user: User,
|
user: User,
|
||||||
|
|
@ -32,9 +41,12 @@ async def get_or_create_dataset_database(
|
||||||
|
|
||||||
dataset_id = await get_unique_dataset_id(dataset, user)
|
dataset_id = await get_unique_dataset_id(dataset, user)
|
||||||
|
|
||||||
vector_db_name = f"{dataset_id}.lance.db"
|
vector_db_name = f"{dataset_id}.db"
|
||||||
graph_db_name = f"{dataset_id}.pkl"
|
graph_db_name = f"{dataset_id}.pkl"
|
||||||
|
|
||||||
|
vector_config = get_vectordb_config()
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
|
||||||
async with db_engine.get_async_session() as session:
|
async with db_engine.get_async_session() as session:
|
||||||
# Create dataset if it doesn't exist
|
# Create dataset if it doesn't exist
|
||||||
if isinstance(dataset, str):
|
if isinstance(dataset, str):
|
||||||
|
|
@ -49,12 +61,19 @@ async def get_or_create_dataset_database(
|
||||||
if existing:
|
if existing:
|
||||||
return existing
|
return existing
|
||||||
|
|
||||||
|
# TODO: Set the vector and graph database stuff (name, provider, etc.) based on the whether or
|
||||||
|
# TODO: not we support multi user for that db. If not, set to default, which is lance and/or kuzu.
|
||||||
|
|
||||||
# If there are no existing rows build a new row
|
# If there are no existing rows build a new row
|
||||||
record = DatasetDatabase(
|
record = DatasetDatabase(
|
||||||
owner_id=user.id,
|
owner_id=user.id,
|
||||||
dataset_id=dataset_id,
|
dataset_id=dataset_id,
|
||||||
vector_database_name=vector_db_name,
|
vector_database_name=vector_db_name,
|
||||||
graph_database_name=graph_db_name,
|
graph_database_name=graph_db_name,
|
||||||
|
vector_database_provider=vector_config.vector_db_provider,
|
||||||
|
graph_database_provider=graph_config.graph_database_provider,
|
||||||
|
vector_database_url=vector_config.vector_db_url,
|
||||||
|
graph_database_url=graph_config.graph_database_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
from .supported_databases import supported_databases
|
from .supported_databases import supported_databases
|
||||||
from .embeddings import get_embedding_engine
|
from .embeddings import get_embedding_engine
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
|
||||||
|
|
@ -45,6 +46,7 @@ def create_vector_engine(
|
||||||
url=vector_db_url,
|
url=vector_db_url,
|
||||||
api_key=vector_db_key,
|
api_key=vector_db_key,
|
||||||
embedding_engine=embedding_engine,
|
embedding_engine=embedding_engine,
|
||||||
|
graph_name=get_graph_config().graph_database_name
|
||||||
)
|
)
|
||||||
|
|
||||||
if vector_db_provider == "pgvector":
|
if vector_db_provider == "pgvector":
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,15 @@ class DatasetDatabase(Base):
|
||||||
UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key=True, index=True
|
UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key=True, index=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# TODO: Why is this unique? Isn't it fact that two or more datasets can have the same vector and graph store?
|
||||||
vector_database_name = Column(String, unique=True, nullable=False)
|
vector_database_name = Column(String, unique=True, nullable=False)
|
||||||
graph_database_name = Column(String, unique=True, nullable=False)
|
graph_database_name = Column(String, unique=True, nullable=False)
|
||||||
|
|
||||||
|
vector_database_provider = Column(String, unique=True, nullable=False)
|
||||||
|
graph_database_provider = Column(String, unique=True, nullable=False)
|
||||||
|
|
||||||
|
vector_database_url = Column(String, unique=True, nullable=True)
|
||||||
|
graph_database_url = Column(String, unique=True, nullable=True)
|
||||||
|
|
||||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||||
updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
|
updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue