diff --git a/examples/data/scientific_papers/TOJ-22-0073_152Mendoza.pdf b/examples/data/scientific_papers/TOJ-22-0073_152Mendoza.pdf new file mode 100644 index 000000000..beabe9a17 Binary files /dev/null and b/examples/data/scientific_papers/TOJ-22-0073_152Mendoza.pdf differ diff --git a/examples/data/scientific_papers/nutrients-13-01241.pdf b/examples/data/scientific_papers/nutrients-13-01241.pdf new file mode 100644 index 000000000..e2405bb59 Binary files /dev/null and b/examples/data/scientific_papers/nutrients-13-01241.pdf differ diff --git a/examples/python/ontology_demo_example_2.py b/examples/python/ontology_demo_example_2.py new file mode 100644 index 000000000..084762a77 --- /dev/null +++ b/examples/python/ontology_demo_example_2.py @@ -0,0 +1,102 @@ +import cognee +import asyncio +from cognee.shared.logging_utils import get_logger +import os +import textwrap +from cognee.api.v1.search import SearchType +from cognee.api.v1.visualize.visualize import visualize_graph + + +async def run_pipeline(ontology_path=None): + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + scientific_papers_dir = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data", "scientific_papers/" + ) + + await cognee.add(scientific_papers_dir) + + pipeline_run = await cognee.cognify(ontology_file_path=ontology_path) + + return pipeline_run + + +async def query_pipeline(questions): + answers = [] + for question in questions: + search_results = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text=question, + ) + answers.append(search_results) + + return answers + + +def print_comparison_table(questions, answers_with, answers_without, col_width=45): + separator = "-" * (col_width * 3 + 6) + + header = f"{'Question'.ljust(col_width)} | {'WITH Ontology (owl grounded facts)'.ljust(col_width)} | {'WITHOUT Ontology'.ljust(col_width)}" + logger.info(separator) + logger.info(header) + logger.info(separator) + + for q, with_o, without_o in zip(questions, answers_with, answers_without): + q_wrapped = textwrap.fill(q, width=col_width) + with_o_wrapped = textwrap.fill(str(with_o), width=col_width) + without_o_wrapped = textwrap.fill(str(without_o), width=col_width) + + q_lines = q_wrapped.split("\n") + with_lines = with_o_wrapped.split("\n") + without_lines = without_o_wrapped.split("\n") + + max_lines = max(len(q_lines), len(with_lines), len(without_lines)) + + for i in range(max_lines): + q_line = q_lines[i] if i < len(q_lines) else "" + with_line = with_lines[i] if i < len(with_lines) else "" + without_line = without_lines[i] if i < len(without_lines) else "" + logger.info( + f"{q_line.ljust(col_width)} | {with_line.ljust(col_width)} | {without_line.ljust(col_width)}" + ) + + logger.info(separator) + + +async def main(): + questions = [ + "What are common risk factors for Type 2 Diabetes?", + "What preventive measures reduce the risk of Hypertension?", + "What symptoms indicate possible Cardiovascular Disease?", + "I have blurred vision and a headache. What diease do I have?", + "What diseases are associated with Obesity?", + ] + + ontology_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "ontology_input_example/enriched_medical_ontology_with_classes.owl", + ) + + logger.info("\n--- Generating answers WITH ontology ---\n") + await run_pipeline(ontology_path=ontology_path) + answers_with_ontology = await query_pipeline(questions) + + logger.info("\n--- Generating answers WITHOUT ontology ---\n") + await run_pipeline() + answers_without_ontology = await query_pipeline(questions) + + print_comparison_table(questions, answers_with_ontology, answers_without_ontology) + + await visualize_graph() + + +if __name__ == "__main__": + logger = get_logger() + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(main()) + finally: + loop.run_until_complete(loop.shutdown_asyncgens()) diff --git a/examples/python/ontology_input_example/enriched_medical_ontology_with_classes.owl b/examples/python/ontology_input_example/enriched_medical_ontology_with_classes.owl new file mode 100644 index 000000000..008d2af62 --- /dev/null +++ b/examples/python/ontology_input_example/enriched_medical_ontology_with_classes.owl @@ -0,0 +1,313 @@ + + + + + + A chronic condition that affects how the body processes glucose. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A condition where the force of blood against artery walls is too high. + + + + + + + + + + + + + + + A disease of abnormal cell growth with potential to invade or spread. + + + + + + + + + + + + + + + + + + + + + + + + A cluster of conditions increasing the risk of heart disease and diabetes. + + + + + + + + + + + + + + + + + Disease is a concept used to classify relevant medical terms. + + + + + + + + + + + + + An irregular and often rapid heart rhythm that may cause blood clots. + + + + + + + + + + + + + + + + + + + + + + + + + + A class of diseases that involve the heart or blood vessels. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PreventiveFactor is a concept used to classify relevant medical terms. + + + + Symptom is a concept used to classify relevant medical terms. + + + + + + + + + + + + + + + + + + + + + + + + + A condition in which the heart is unable to pump sufficiently. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RiskFactor is a concept used to classify relevant medical terms. + +