* Update cognify and the networkx client to prepare for running in Neo4j * Fix for openai model * Add the fix to the infra so that the models can be passed to the library. Enable llm_provider to be passed. * Auto graph generation now works with neo4j * Added fixes for both neo4j and networkx * Explicitly name semantic node connections * Added updated docs, readme, chunkers and updates to cognify * Make docs build trigger only when changes on it happen * Update docs, test git actions * Separate cognify logic into tasks * Introduce dspy knowledge graph extraction --------- Co-authored-by: Boris Arzentar <borisarzentar@gmail.com>
43 lines
1.2 KiB
Python
43 lines
1.2 KiB
Python
from typing import TypedDict
|
|
from uuid import uuid4
|
|
from cognee.infrastructure import infrastructure_config
|
|
from cognee.infrastructure.databases.vector import DataPoint
|
|
|
|
class TextChunk(TypedDict):
|
|
text: str
|
|
file_metadata: dict
|
|
|
|
async def add_data_chunks(dataset_data_chunks: dict[str, list[TextChunk]]):
|
|
vector_client = infrastructure_config.get_config("vector_engine")
|
|
|
|
identified_chunks = []
|
|
|
|
for (dataset_name, chunks) in dataset_data_chunks.items():
|
|
try:
|
|
await vector_client.create_collection(dataset_name)
|
|
except Exception:
|
|
pass
|
|
|
|
dataset_chunks = [
|
|
dict(
|
|
id = str(uuid4()),
|
|
collection = dataset_name,
|
|
text = chunk["text"],
|
|
file_metadata = chunk["file_metadata"],
|
|
) for chunk in chunks
|
|
]
|
|
|
|
identified_chunks.extend(dataset_chunks)
|
|
|
|
await vector_client.create_data_points(
|
|
dataset_name,
|
|
[
|
|
DataPoint(
|
|
id = chunk["id"],
|
|
payload = dict(text = chunk["text"]),
|
|
embed_field = "text"
|
|
) for chunk in dataset_chunks
|
|
],
|
|
)
|
|
|
|
return identified_chunks
|