diff --git a/.github/workflows/docs_tests.yml b/.github/workflows/docs_tests.yml new file mode 100644 index 000000000..b3c538668 --- /dev/null +++ b/.github/workflows/docs_tests.yml @@ -0,0 +1,18 @@ +name: Docs Test Suite +permissions: + contents: read + +on: + release: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + RUNTIME__LOG_LEVEL: ERROR + ENV: 'dev' + +jobs: + diff --git a/cognee/tests/docs/guides/custom_data_models.py b/cognee/tests/docs/guides/custom_data_models.py new file mode 100644 index 000000000..0eb314227 --- /dev/null +++ b/cognee/tests/docs/guides/custom_data_models.py @@ -0,0 +1,38 @@ +import asyncio +from typing import Any +from pydantic import SkipValidation + +import cognee +from cognee.infrastructure.engine import DataPoint +from cognee.infrastructure.engine.models.Edge import Edge +from cognee.tasks.storage import add_data_points + + +class Person(DataPoint): + name: str + # Keep it simple for forward refs / mixed values + knows: SkipValidation[Any] = None # single Person or list[Person] + # Recommended: specify which fields to index for search + metadata: dict = {"index_fields": ["name"]} + + +async def main(): + # Start clean (optional in your app) + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + alice = Person(name="Alice") + bob = Person(name="Bob") + charlie = Person(name="Charlie") + + # Create relationships - field name becomes edge label + alice.knows = bob + # You can also do lists: alice.knows = [bob, charlie] + + # Optional: add weights and custom relationship types + bob.knows = (Edge(weight=0.9, relationship_type="friend_of"), charlie) + + await add_data_points([alice, bob, charlie]) + + +asyncio.run(main()) diff --git a/cognee/tests/docs/guides/custom_prompts.py b/cognee/tests/docs/guides/custom_prompts.py new file mode 100644 index 000000000..0d0a55a80 --- /dev/null +++ b/cognee/tests/docs/guides/custom_prompts.py @@ -0,0 +1,30 @@ +import asyncio +import cognee +from cognee.api.v1.search import SearchType + +custom_prompt = """ +Extract only people and cities as entities. +Connect people to cities with the relationship "lives_in". +Ignore all other entities. +""" + + +async def main(): + await cognee.add( + [ + "Alice moved to Paris in 2010, while Bob has always lived in New York.", + "Andreas was born in Venice, but later settled in Lisbon.", + "Diana and Tom were born and raised in Helsingy. Diana currently resides in Berlin, while Tom never moved.", + ] + ) + await cognee.cognify(custom_prompt=custom_prompt) + + res = await cognee.search( + query_type=SearchType.GRAPH_COMPLETION, + query_text="Where does Alice live?", + ) + print(res) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/cognee/tests/docs/guides/custom_tasks_and_pipelines.py b/cognee/tests/docs/guides/custom_tasks_and_pipelines.py new file mode 100644 index 000000000..202bb128a --- /dev/null +++ b/cognee/tests/docs/guides/custom_tasks_and_pipelines.py @@ -0,0 +1,53 @@ +import asyncio +from typing import Any, Dict, List +from pydantic import BaseModel, SkipValidation + +import cognee +from cognee.modules.engine.operations.setup import setup +from cognee.infrastructure.llm.LLMGateway import LLMGateway +from cognee.infrastructure.engine import DataPoint +from cognee.tasks.storage import add_data_points +from cognee.modules.pipelines import Task, run_pipeline + + +class Person(DataPoint): + name: str + # Optional relationships (we'll let the LLM populate this) + knows: List["Person"] = [] + # Make names searchable in the vector store + metadata: Dict[str, Any] = {"index_fields": ["name"]} + + +class People(BaseModel): + persons: List[Person] + + +async def extract_people(text: str) -> List[Person]: + system_prompt = ( + "Extract people mentioned in the text. " + "Return as `persons: Person[]` with each Person having `name` and optional `knows` relations. " + "If the text says someone knows someone set `knows` accordingly. " + "Only include facts explicitly stated." + ) + people = await LLMGateway.acreate_structured_output(text, system_prompt, People) + return people.persons + + +async def main(): + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + await setup() + + text = "Alice knows Bob." + + tasks = [ + Task(extract_people), # input: text -> output: list[Person] + Task(add_data_points), # input: list[Person] -> output: list[Person] + ] + + async for _ in run_pipeline(tasks=tasks, data=text, datasets=["people_demo"]): + pass + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/cognee/tests/docs/guides/graph_visualization.py b/cognee/tests/docs/guides/graph_visualization.py new file mode 100644 index 000000000..d463cbb56 --- /dev/null +++ b/cognee/tests/docs/guides/graph_visualization.py @@ -0,0 +1,13 @@ +import asyncio +import cognee +from cognee.api.v1.visualize.visualize import visualize_graph + + +async def main(): + await cognee.add(["Alice knows Bob.", "NLP is a subfield of CS."]) + await cognee.cognify() + + await visualize_graph("./graph_after_cognify.html") + + +asyncio.run(main()) diff --git a/cognee/tests/docs/guides/low_level_llm.py b/cognee/tests/docs/guides/low_level_llm.py new file mode 100644 index 000000000..454f53f44 --- /dev/null +++ b/cognee/tests/docs/guides/low_level_llm.py @@ -0,0 +1,31 @@ +import asyncio + +from pydantic import BaseModel +from typing import List +from cognee.infrastructure.llm.LLMGateway import LLMGateway + + +class MiniEntity(BaseModel): + name: str + type: str + + +class MiniGraph(BaseModel): + nodes: List[MiniEntity] + + +async def main(): + system_prompt = ( + "Extract entities as nodes with name and type. " + "Use concise, literal values present in the text." + ) + + text = "Apple develops iPhone; Audi produces the R8." + + result = await LLMGateway.acreate_structured_output(text, system_prompt, MiniGraph) + print(result) + # MiniGraph(nodes=[MiniEntity(name='Apple', type='Organization'), ...]) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/cognee/tests/docs/guides/memify_quickstart.py b/cognee/tests/docs/guides/memify_quickstart.py new file mode 100644 index 000000000..040654350 --- /dev/null +++ b/cognee/tests/docs/guides/memify_quickstart.py @@ -0,0 +1,29 @@ +import asyncio +import cognee +from cognee import SearchType + + +async def main(): + # 1) Add two short chats and build a graph + await cognee.add( + [ + "We follow PEP8. Add type hints and docstrings.", + "Releases should not be on Friday. Susan must review PRs.", + ], + dataset_name="rules_demo", + ) + await cognee.cognify(datasets=["rules_demo"]) # builds graph + + # 2) Enrich the graph (uses default memify tasks) + await cognee.memify(dataset="rules_demo") + + # 3) Query the new coding rules + rules = await cognee.search( + query_type=SearchType.CODING_RULES, + query_text="List coding rules", + node_name=["coding_agent_rules"], + ) + print("Rules:", rules) + + +asyncio.run(main()) diff --git a/cognee/tests/docs/guides/ontology_quickstart.py b/cognee/tests/docs/guides/ontology_quickstart.py new file mode 100644 index 000000000..2784dab19 --- /dev/null +++ b/cognee/tests/docs/guides/ontology_quickstart.py @@ -0,0 +1,30 @@ +import asyncio +import cognee + + +async def main(): + texts = ["Audi produces the R8 and e-tron.", "Apple develops iPhone and MacBook."] + + await cognee.add(texts) + # or: await cognee.add("/path/to/folder/of/files") + + import os + from cognee.modules.ontology.ontology_config import Config + from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver + + ontology_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "ontology_input_example/basic_ontology.owl" + ) + + # Create full config structure manually + config: Config = { + "ontology_config": { + "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path) + } + } + + await cognee.cognify(config=config) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/cognee/tests/docs/guides/s3_storage.py b/cognee/tests/docs/guides/s3_storage.py new file mode 100644 index 000000000..1044e05b4 --- /dev/null +++ b/cognee/tests/docs/guides/s3_storage.py @@ -0,0 +1,25 @@ +import asyncio +import cognee + + +async def main(): + # Single file + await cognee.add("s3://cognee-temp/2024-11-04.md") + + # Folder/prefix (recursively expands) + await cognee.add("s3://cognee-temp") + + # Mixed list + await cognee.add( + [ + "s3://cognee-temp/2024-11-04.md", + "Some inline text to ingest", + ] + ) + + # Process the data + await cognee.cognify() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/cognee/tests/docs/guides/search_basics.py b/cognee/tests/docs/guides/search_basics.py new file mode 100644 index 000000000..67d0c938d --- /dev/null +++ b/cognee/tests/docs/guides/search_basics.py @@ -0,0 +1,17 @@ +import asyncio +import cognee + + +async def main(): + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + text = "First rule of coding: Do not talk about coding." + + # Make sure you've already run cognee.cognify(...) so the graph has content + answers = await cognee.search(query_text="What are the main themes in my data?") + for answer in answers: + print(answer) + + +asyncio.run(main()) diff --git a/cognee/tests/docs/guides/temporal_cognify.py b/cognee/tests/docs/guides/temporal_cognify.py new file mode 100644 index 000000000..34c1ee33c --- /dev/null +++ b/cognee/tests/docs/guides/temporal_cognify.py @@ -0,0 +1,57 @@ +import asyncio +import cognee + + +async def main(): + text = """ + In 1998 the project launched. In 2001 version 1.0 shipped. In 2004 the team merged + with another group. In 2010 support for v1 ended. + """ + + await cognee.add(text, dataset_name="timeline_demo") + + await cognee.cognify(datasets=["timeline_demo"], temporal_cognify=True) + + from cognee.api.v1.search import SearchType + + # Before / after queries + result = await cognee.search( + query_type=SearchType.TEMPORAL, query_text="What happened before 2000?", top_k=10 + ) + + assert result != [] + + result = await cognee.search( + query_type=SearchType.TEMPORAL, query_text="What happened after 2010?", top_k=10 + ) + + assert result != [] + + # Between queries + result = await cognee.search( + query_type=SearchType.TEMPORAL, query_text="Events between 2001 and 2004", top_k=10 + ) + + assert result != [] + + # Scoped descriptions + result = await cognee.search( + query_type=SearchType.TEMPORAL, + query_text="Key project milestones between 1998 and 2010", + top_k=10, + ) + + assert result != [] + + result = await cognee.search( + query_type=SearchType.TEMPORAL, + query_text="What happened after 2004?", + datasets=["timeline_demo"], + top_k=10, + ) + + assert result != [] + + +if __name__ == "__main__": + asyncio.run(main())