Add evals for cognee
This commit is contained in:
parent
bd42cebd8b
commit
2657aa7096
8 changed files with 199 additions and 12 deletions
|
|
@ -12,7 +12,7 @@ from cognee.api.v1.prune import prune
|
|||
from cognee.config import Config
|
||||
from cognee.infrastructure.data.chunking.LangchainChunkingEngine import LangchainChunkEngine
|
||||
from cognee.infrastructure.databases.vector.embeddings.DefaultEmbeddingEngine import LiteLLMEmbeddingEngine
|
||||
from cognee.modules.cognify.graph.add_data_chunks import add_data_chunks
|
||||
from cognee.modules.cognify.graph.add_data_chunks import add_data_chunks, add_data_chunks_basic_rag
|
||||
from cognee.modules.cognify.graph.add_document_node import add_document_node
|
||||
from cognee.modules.cognify.graph.add_classification_nodes import add_classification_nodes
|
||||
from cognee.modules.cognify.graph.add_cognitive_layer_graphs import add_cognitive_layer_graphs
|
||||
|
|
@ -80,7 +80,6 @@ async def cognify(datasets: Union[str, List[str]] = None):
|
|||
if dataset_name in added_dataset:
|
||||
dataset_files.append((added_dataset, db_engine.get_files_metadata(added_dataset)))
|
||||
|
||||
# print("dataset_files", dataset_files)
|
||||
|
||||
|
||||
data_chunks = {}
|
||||
|
|
@ -109,13 +108,14 @@ async def cognify(datasets: Union[str, List[str]] = None):
|
|||
logger.warning("File (%s) has an unknown file type. We are skipping it.", file_metadata["id"])
|
||||
|
||||
added_chunks = await add_data_chunks(data_chunks)
|
||||
added__basic_rag_chunks = await add_data_chunks_basic_rag(data_chunks)
|
||||
|
||||
|
||||
await asyncio.gather(
|
||||
*[process_text(chunk["collection"], chunk["chunk_id"], chunk["text"], chunk["file_metadata"],chunk['document_id']) for chunk in
|
||||
added_chunks]
|
||||
)
|
||||
|
||||
# await asyncio.gather(
|
||||
# *[process_text(chunk["collection"], chunk["chunk_id"], chunk["text"], chunk["file_metadata"],chunk['document_id']) for chunk in
|
||||
# added_chunks]
|
||||
# )
|
||||
#
|
||||
batch_size = 20
|
||||
file_count = 0
|
||||
files_batch = []
|
||||
|
|
@ -260,12 +260,16 @@ if __name__ == "__main__":
|
|||
|
||||
config.set_graph_model(SourceCodeGraph)
|
||||
config.set_classification_model(CodeContentPrediction)
|
||||
|
||||
graph = await cognify()
|
||||
#
|
||||
from cognee.utils import render_graph
|
||||
vector_client = infrastructure_config.get_config("vector_engine")
|
||||
|
||||
await render_graph(graph, include_color=True, include_nodes=False, include_size=False)
|
||||
out = await vector_client.search(collection_name ="basic_rag", query_text="show_all_processes", limit=10)
|
||||
|
||||
print("results", out)
|
||||
#
|
||||
# from cognee.utils import render_graph
|
||||
#
|
||||
# await render_graph(graph, include_color=True, include_nodes=False, include_size=False)
|
||||
|
||||
import asyncio
|
||||
asyncio.run(test())
|
||||
|
|
|
|||
|
|
@ -52,3 +52,45 @@ async def add_data_chunks(dataset_data_chunks: dict[str, list[TextChunk]]):
|
|||
)
|
||||
|
||||
return identified_chunks
|
||||
|
||||
|
||||
async def add_data_chunks_basic_rag(dataset_data_chunks: dict[str, list[TextChunk]]):
|
||||
vector_client = infrastructure_config.get_config("vector_engine")
|
||||
|
||||
identified_chunks = []
|
||||
|
||||
class PayloadSchema(BaseModel):
|
||||
text: str = Field(...)
|
||||
|
||||
for (dataset_name, chunks) in dataset_data_chunks.items():
|
||||
try:
|
||||
|
||||
await vector_client.create_collection("basic_rag", payload_schema = PayloadSchema)
|
||||
except Exception as error:
|
||||
print(error)
|
||||
pass
|
||||
|
||||
dataset_chunks = [
|
||||
dict(
|
||||
chunk_id = chunk["chunk_id"],
|
||||
collection = "basic_rag",
|
||||
text = chunk["text"],
|
||||
document_id = chunk["document_id"],
|
||||
file_metadata = chunk["file_metadata"],
|
||||
) for chunk in chunks
|
||||
]
|
||||
|
||||
identified_chunks.extend(dataset_chunks)
|
||||
|
||||
await vector_client.create_data_points(
|
||||
"basic_rag",
|
||||
[
|
||||
DataPoint[PayloadSchema](
|
||||
id = chunk["chunk_id"],
|
||||
payload = PayloadSchema.parse_obj(dict(text = chunk["text"])),
|
||||
embed_field = "text",
|
||||
) for chunk in dataset_chunks
|
||||
],
|
||||
)
|
||||
|
||||
return identified_chunks
|
||||
|
|
|
|||
46
evals/generate_test_set.py
Normal file
46
evals/generate_test_set.py
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
from deepeval.dataset import EvaluationDataset
|
||||
from deepeval.synthesizer import Synthesizer
|
||||
import dotenv
|
||||
from deepeval.test_case import LLMTestCase
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# synthesizer = Synthesizer()
|
||||
# synthesizer.generate_goldens_from_docs(
|
||||
# document_paths=['natural_language_processing.txt', 'soldiers_home.pdf', 'trump.txt'],
|
||||
# max_goldens_per_document=5,
|
||||
# num_evolutions=5,
|
||||
# include_expected_output=True,
|
||||
# enable_breadth_evolve=True,
|
||||
# )
|
||||
#
|
||||
# synthesizer.save_as(
|
||||
# file_type='json', # or 'csv'
|
||||
# directory="./synthetic_data"
|
||||
# )
|
||||
|
||||
|
||||
dataset = EvaluationDataset()
|
||||
dataset.generate_goldens_from_docs(
|
||||
document_paths=['soldiers_home.pdf'],
|
||||
max_goldens_per_document=10
|
||||
)
|
||||
|
||||
|
||||
print(dataset.goldens)
|
||||
print(dataset)
|
||||
|
||||
|
||||
import pytest
|
||||
from deepeval import assert_test
|
||||
from deepeval.metrics import AnswerRelevancyMetric
|
||||
|
||||
|
||||
answer_relevancy_metric = AnswerRelevancyMetric(threshold=0.5)
|
||||
|
||||
from deepeval import evaluate
|
||||
|
||||
|
||||
# evaluate(dataset, [answer_relevancy_metric])
|
||||
|
||||
|
||||
2
evals/natural_language_processing.txt
Normal file
2
evals/natural_language_processing.txt
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval. It is primarily concerned with giving computers the ability to support and manipulate human language. It involves processing natural language datasets, such as text corpora or speech corpora, using either rule-based or probabilistic (i.e. statistical and, most recently, neural network-based) machine learning approaches. The goal is a computer capable of "understanding"[citation needed] the contents of documents, including the contextual nuances of the language within them. To this end, natural language processing often borrows ideas from theoretical linguistics. The technology can then accurately extract information and insights contained in the documents as well as categorize and organize the documents themselves.
|
||||
Challenges in natural language processing frequently involve speech recognition, natural-language understanding, and natural-language generation.
|
||||
75
evals/simple_rag_vs_cognee_eval.py
Normal file
75
evals/simple_rag_vs_cognee_eval.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
from deepeval.dataset import EvaluationDataset
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
from typing import List, Type
|
||||
from deepeval.test_case import LLMTestCase
|
||||
from deepeval.dataset import Golden
|
||||
import dotenv
|
||||
dotenv.load_dotenv()
|
||||
|
||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||
|
||||
dataset = EvaluationDataset()
|
||||
dataset.add_test_cases_from_json_file(
|
||||
# file_path is the absolute path to you .json file
|
||||
file_path="synthetic_data/20240519_185842.json",
|
||||
input_key_name="query",
|
||||
actual_output_key_name="actual_output",
|
||||
expected_output_key_name="expected_output",
|
||||
context_key_name="context",
|
||||
retrieval_context_key_name="retrieval_context",
|
||||
)
|
||||
|
||||
|
||||
|
||||
import logging
|
||||
from typing import List, Dict
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def AnswerModel(BaseModel):
|
||||
response:str
|
||||
def get_answer_base(content: str, response_model: Type[BaseModel]):
|
||||
llm_client = get_llm_client()
|
||||
|
||||
system_prompt = "Answer the following question: and use the context"
|
||||
|
||||
return llm_client.create_structured_output(content, system_prompt, response_model)
|
||||
def get_answer(content: str, model: Type[BaseModel]= AnswerModel):
|
||||
|
||||
try:
|
||||
return (get_answer_base(
|
||||
content,
|
||||
model
|
||||
))
|
||||
except Exception as error:
|
||||
logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True)
|
||||
raise error
|
||||
|
||||
|
||||
|
||||
|
||||
def convert_goldens_to_test_cases(goldens: List[Golden]) -> List[LLMTestCase]:
|
||||
test_cases = []
|
||||
for golden in goldens:
|
||||
test_case = LLMTestCase(
|
||||
input=golden.input,
|
||||
# Generate actual output using the 'input' and 'additional_metadata'
|
||||
actual_output= get_answer(golden.input),
|
||||
expected_output=golden.expected_output,
|
||||
context=golden.context,
|
||||
)
|
||||
test_cases.append(test_case)
|
||||
return test_cases
|
||||
|
||||
# Data preprocessing before setting the dataset test cases
|
||||
dataset.test_cases = convert_goldens_to_test_cases(dataset.goldens)
|
||||
|
||||
|
||||
from deepeval.metrics import HallucinationMetric
|
||||
|
||||
|
||||
metric = HallucinationMetric()
|
||||
dataset.evaluate([metric])
|
||||
BIN
evals/soldiers_home.pdf
Normal file
BIN
evals/soldiers_home.pdf
Normal file
Binary file not shown.
15
evals/trump.txt
Normal file
15
evals/trump.txt
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
Donald Trump flirted with the idea of being president for three terms – a clear violation of the US constitution – during a bombastic speech for the National Rifle Association in which he vowed to reverse gun safety measures green-lighted during the Biden administration.
|
||||
|
||||
“You know, FDR 16 years – almost 16 years – he was four terms. I don’t know, are we going to be considered three-term? Or two-term?” The ex-president and GOP presidential frontrunner said to the organization’s annual convention in Dallas, prompting some in the crowd to yell “three!” Politico reported.
|
||||
|
||||
Trump has floated a third term in past comments, even mentioning a prolonged presidency while campaigning in 2020. He has also tried distancing himself from this idea, telling Time magazine in April: “I wouldn’t be in favor of it at all. I intend to serve four years and do a great job.”
|
||||
|
||||
The 22nd amendment, which was enacted following Franklin Delano Rosevelt’s fourth term, limits the presidency to two terms.
|
||||
|
||||
In his speech to the NRA, Trump spoke on abortion, immigration and criticized Robert F Kennedy Jr as being part of the “radical left”. He also complained about the multiple criminal cases against him, including a gag order that bars him from commenting about witnesses in his ongoing New York City criminal trial.
|
||||
|
||||
Trump has the NRA’s endorsement, but the organization has recently been reeling from legal and financial woe and is not quite the force in US politics it once was.
|
||||
|
||||
The NRA is holding its convention less than three months after its former long-serving leader Wayne LaPierre – as well as other executives of the group – were held liable in a lawsuit centered on the organization’s lavish spending.
|
||||
|
||||
Trump, who said he heard that gun owners “don’t vote,” pushed NRA members to hit the polls in November: “Let’s be rebellious and vote this time, OK?”
|
||||
|
|
@ -38,7 +38,7 @@ greenlet = "^3.0.3"
|
|||
ruff = "^0.2.2"
|
||||
filetype = "^1.2.0"
|
||||
nltk = "^3.8.1"
|
||||
dlt = "^0.4.7"
|
||||
dlt = "0.4.10"
|
||||
duckdb = {version = "^0.10.0", extras = ["dlt"]}
|
||||
overrides = "^7.7.0"
|
||||
aiofiles = "^23.2.1"
|
||||
|
|
@ -68,6 +68,9 @@ tantivy = "^0.21.0"
|
|||
langfuse = "^2.32.0"
|
||||
spacy = "^3.7.4"
|
||||
protobuf = "<5.0.0"
|
||||
langchain-community = "0.0.38"
|
||||
langchain ="0.1.10"
|
||||
deepeval = "^0.21.42"
|
||||
|
||||
|
||||
[tool.poetry.extras]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue