cognee/cognee/infrastructure/databases/vector/embeddings/DefaultEmbeddingEngine.py
Vasilije bb679c2dd7
Improve processing, update networkx client, and Neo4j, and dspy (#69)
* Update cognify and the networkx client to prepare for running in Neo4j

* Fix for openai model

* Add the fix to the infra so that the models can be passed to the library. Enable llm_provider to be passed.

* Auto graph generation now works with neo4j

* Added fixes for both neo4j and networkx

* Explicitly name semantic node connections

* Added updated docs, readme, chunkers and updates to cognify

* Make docs build trigger only when changes on it happen

* Update docs, test git actions

* Separate cognify logic into tasks

* Introduce dspy knowledge graph extraction

---------
Co-authored-by: Boris Arzentar <borisarzentar@gmail.com>
2024-04-20 19:05:40 +02:00

42 lines
1.4 KiB
Python

from typing import List
import instructor
from openai import AsyncOpenAI
from fastembed import TextEmbedding
from fastembed import TextEmbedding
from openai import AsyncOpenAI
from cognee.config import Config
from cognee.root_dir import get_absolute_path
from .EmbeddingEngine import EmbeddingEngine
config = Config()
config.load()
class DefaultEmbeddingEngine(EmbeddingEngine):
async def embed_text(self, text: List[str]) -> List[float]:
embedding_model = TextEmbedding(model_name = config.embedding_model, cache_dir = get_absolute_path("cache/embeddings"))
embeddings_list = list(map(lambda embedding: embedding.tolist(), embedding_model.embed(text)))
return embeddings_list
def get_vector_size(self) -> int:
return config.embedding_dimensions
class OpenAIEmbeddingEngine(EmbeddingEngine):
async def embed_text(self, text: List[str]) -> List[float]:
OPENAI_API_KEY = config.openai_key
aclient = instructor.patch(AsyncOpenAI())
text = text.replace("\n", " ")
response = await aclient.embeddings.create(input = text, model = config.openai_embedding_model)
embedding = response.data[0].embedding
# embeddings_list = list(map(lambda embedding: embedding.tolist(), embedding_model.embed(text)))
return embedding
def get_vector_size(self) -> int:
return config.openai_embedding_dimensions