fix: cognify status table update
This commit is contained in:
parent
245ed66c9e
commit
c9d9672fed
9 changed files with 41 additions and 28 deletions
|
|
@ -59,7 +59,7 @@ async def add_files(file_paths: List[str], dataset_name: str):
|
|||
|
||||
if data_directory_path not in file_path:
|
||||
file_name = file_path.split("/")[-1]
|
||||
file_directory_path = data_directory_path + "/" + (dataset_name.replace(".", "/") + "/" if dataset_name != "root" else "")
|
||||
file_directory_path = data_directory_path + "/" + (dataset_name.replace(".", "/") + "/" if dataset_name != "main_dataset" else "")
|
||||
dataset_file_path = path.join(file_directory_path, file_name)
|
||||
|
||||
LocalStorage.ensure_directory_exists(file_directory_path)
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ class DuckDBAdapter():
|
|||
|
||||
return list(
|
||||
filter(
|
||||
lambda table_name: table_name.endswith("staging") is False,
|
||||
lambda schema_name: not schema_name.endswith("staging") and schema_name != "cognee",
|
||||
tables["schema_name"]
|
||||
)
|
||||
)
|
||||
|
|
@ -22,14 +22,18 @@ class DuckDBAdapter():
|
|||
with self.get_connection() as connection:
|
||||
return connection.sql(f"SELECT id, name, file_path, extension, mime_type, keywords FROM {dataset_name}.file_metadata;").to_df().to_dict("records")
|
||||
|
||||
def create_table(self, table_name: str, table_config: list[dict]):
|
||||
def create_table(self, schema_name: str, table_name: str, table_config: list[dict]):
|
||||
fields_query_parts = []
|
||||
|
||||
for table_config_item in table_config:
|
||||
fields_query_parts.append(f"{table_config_item['name']} {table_config_item['type']}")
|
||||
|
||||
with self.get_connection() as connection:
|
||||
query = f"CREATE TABLE IF NOT EXISTS {table_name} ({', '.join(fields_query_parts)});"
|
||||
query = f"CREATE SCHEMA IF NOT EXISTS {schema_name};"
|
||||
connection.execute(query)
|
||||
|
||||
with self.get_connection() as connection:
|
||||
query = f"CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} ({', '.join(fields_query_parts)});"
|
||||
connection.execute(query)
|
||||
|
||||
def delete_table(self, table_name: str):
|
||||
|
|
@ -37,7 +41,7 @@ class DuckDBAdapter():
|
|||
query = f"DROP TABLE IF EXISTS {table_name};"
|
||||
connection.execute(query)
|
||||
|
||||
def insert_data(self, table_name: str, data: list[dict]):
|
||||
def insert_data(self, schema_name: str, table_name: str, data: list[dict]):
|
||||
def get_values(data_entry: list):
|
||||
return ", ".join([f"'{value}'" if isinstance(value, str) else value for value in data_entry])
|
||||
|
||||
|
|
@ -45,7 +49,7 @@ class DuckDBAdapter():
|
|||
values = ", ".join([f"({get_values(data_entry.values())})" for data_entry in data])
|
||||
|
||||
with self.get_connection() as connection:
|
||||
query = f"INSERT INTO {table_name} ({columns}) VALUES {values};"
|
||||
query = f"INSERT INTO {schema_name}.{table_name} ({columns}) VALUES {values};"
|
||||
connection.execute(query)
|
||||
|
||||
def get_data(self, table_name: str, filters: dict = None):
|
||||
|
|
|
|||
|
|
@ -1,18 +1,17 @@
|
|||
import asyncio
|
||||
from typing import List, Optional
|
||||
from openai import AsyncOpenAI
|
||||
from fastembed import TextEmbedding
|
||||
|
||||
import litellm
|
||||
from litellm import aembedding
|
||||
from cognee.root_dir import get_absolute_path
|
||||
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
||||
from litellm import aembedding
|
||||
import litellm
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
class DefaultEmbeddingEngine(EmbeddingEngine):
|
||||
embedding_model: str
|
||||
embedding_dimensions: int
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
embedding_model: Optional[str],
|
||||
|
|
@ -34,6 +33,7 @@ class DefaultEmbeddingEngine(EmbeddingEngine):
|
|||
class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
||||
embedding_model: str
|
||||
embedding_dimensions: int
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
embedding_model: Optional[str],
|
||||
|
|
@ -41,8 +41,6 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
):
|
||||
self.embedding_model = embedding_model
|
||||
self.embedding_dimensions = embedding_dimensions
|
||||
import asyncio
|
||||
from typing import List
|
||||
|
||||
async def embed_text(self, text: List[str]) -> List[List[float]]:
|
||||
async def get_embedding(text_):
|
||||
|
|
@ -52,19 +50,20 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|||
tasks = [get_embedding(text_) for text_ in text]
|
||||
result = await asyncio.gather(*tasks)
|
||||
return result
|
||||
|
||||
def get_vector_size(self) -> int:
|
||||
return self.embedding_dimensions
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
async def gg():
|
||||
openai_embedding_engine = LiteLLMEmbeddingEngine()
|
||||
# print(openai_embedding_engine.embed_text(["Hello, how are you?"]))
|
||||
# print(openai_embedding_engine.get_vector_size())
|
||||
# default_embedding_engine = DefaultEmbeddingEngine()
|
||||
sds = await openai_embedding_engine.embed_text(["Hello, sadasdas are you?"])
|
||||
print(sds)
|
||||
# print(default_embedding_engine.get_vector_size())
|
||||
# if __name__ == "__main__":
|
||||
# async def gg():
|
||||
# openai_embedding_engine = LiteLLMEmbeddingEngine()
|
||||
# # print(openai_embedding_engine.embed_text(["Hello, how are you?"]))
|
||||
# # print(openai_embedding_engine.get_vector_size())
|
||||
# # default_embedding_engine = DefaultEmbeddingEngine()
|
||||
# sds = await openai_embedding_engine.embed_text(["Hello, sadasdas are you?"])
|
||||
# print(sds)
|
||||
# # print(default_embedding_engine.get_vector_size())
|
||||
|
||||
asyncio.run(gg())
|
||||
# asyncio.run(gg())
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
from typing import Optional
|
||||
from functools import lru_cache
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
|
@ -5,7 +6,7 @@ class LLMConfig(BaseSettings):
|
|||
llm_provider: str = "openai"
|
||||
llm_model: str = "gpt-4o"
|
||||
llm_endpoint: str = ""
|
||||
llm_api_key: str = ""
|
||||
llm_api_key: Optional[str] = None
|
||||
llm_temperature: float = 0.0
|
||||
llm_streaming: bool = False
|
||||
|
||||
|
|
|
|||
|
|
@ -16,15 +16,24 @@ def get_llm_client():
|
|||
provider = LLMProvider(llm_config.llm_provider)
|
||||
|
||||
if provider == LLMProvider.OPENAI:
|
||||
if llm_config.llm_api_key is None:
|
||||
raise ValueError("LLM API key is not set.")
|
||||
|
||||
from .openai.adapter import OpenAIAdapter
|
||||
return OpenAIAdapter(llm_config.llm_api_key, llm_config.llm_model, llm_config.llm_streaming)
|
||||
elif provider == LLMProvider.OLLAMA:
|
||||
if llm_config.llm_api_key is None:
|
||||
raise ValueError("LLM API key is not set.")
|
||||
|
||||
from .generic_llm_api.adapter import GenericAPIAdapter
|
||||
return GenericAPIAdapter(llm_config.llm_endpoint, llm_config.llm_api_key, llm_config.llm_model, "Ollama")
|
||||
elif provider == LLMProvider.ANTHROPIC:
|
||||
from .anthropic.adapter import AnthropicAdapter
|
||||
return AnthropicAdapter(llm_config.llm_model)
|
||||
elif provider == LLMProvider.CUSTOM:
|
||||
if llm_config.llm_api_key is None:
|
||||
raise ValueError("LLM API key is not set.")
|
||||
|
||||
from .generic_llm_api.adapter import GenericAPIAdapter
|
||||
return GenericAPIAdapter(llm_config.llm_endpoint, llm_config.llm_api_key, llm_config.llm_model, "Custom")
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ def create_task_status_table():
|
|||
config = get_relationaldb_config()
|
||||
db_engine = config.database_engine
|
||||
|
||||
db_engine.create_table("cognee_task_status", [
|
||||
db_engine.create_table("cognee.cognee", "cognee_task_status", [
|
||||
dict(name = "data_id", type = "STRING"),
|
||||
dict(name = "status", type = "STRING"),
|
||||
dict(name = "created_at", type = "TIMESTAMP DEFAULT CURRENT_TIMESTAMP"),
|
||||
|
|
|
|||
|
|
@ -10,10 +10,10 @@ def get_task_status(data_ids: [str]):
|
|||
f"""SELECT data_id, status
|
||||
FROM (
|
||||
SELECT data_id, status, ROW_NUMBER() OVER (PARTITION BY data_id ORDER BY created_at DESC) as rn
|
||||
FROM cognee_task_status
|
||||
FROM cognee.cognee.cognee_task_status
|
||||
WHERE data_id IN ({formatted_data_ids})
|
||||
) t
|
||||
WHERE rn = 1;"""
|
||||
)
|
||||
|
||||
return results
|
||||
return results[0] if len(results) > 0 else None
|
||||
|
|
|
|||
|
|
@ -3,4 +3,4 @@ from cognee.infrastructure.databases.relational.config import get_relationaldb_c
|
|||
def update_task_status(data_id: str, status: str):
|
||||
config = get_relationaldb_config()
|
||||
db_engine = config.database_engine
|
||||
db_engine.insert_data("cognee_task_status", [dict(data_id = data_id, status = status)])
|
||||
db_engine.insert_data("cognee.cognee", "cognee_task_status", [dict(data_id = data_id, status = status)])
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "cognee"
|
||||
version = "0.1.9"
|
||||
version = "0.1.11"
|
||||
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
|
||||
authors = ["Vasilije Markovic", "Boris Arzentar"]
|
||||
readme = "README.md"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue