test: Add docs tests. Initial commit, still WIP.

This commit is contained in:
Andrej Milicevic 2025-11-03 15:31:09 +01:00
parent 8d7c4d5384
commit 90d10e6f9a
11 changed files with 341 additions and 0 deletions

18
.github/workflows/docs_tests.yml vendored Normal file
View file

@ -0,0 +1,18 @@
name: Docs Test Suite
permissions:
contents: read
on:
release:
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
RUNTIME__LOG_LEVEL: ERROR
ENV: 'dev'
jobs:

View file

@ -0,0 +1,38 @@
import asyncio
from typing import Any
from pydantic import SkipValidation
import cognee
from cognee.infrastructure.engine import DataPoint
from cognee.infrastructure.engine.models.Edge import Edge
from cognee.tasks.storage import add_data_points
class Person(DataPoint):
name: str
# Keep it simple for forward refs / mixed values
knows: SkipValidation[Any] = None # single Person or list[Person]
# Recommended: specify which fields to index for search
metadata: dict = {"index_fields": ["name"]}
async def main():
# Start clean (optional in your app)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
alice = Person(name="Alice")
bob = Person(name="Bob")
charlie = Person(name="Charlie")
# Create relationships - field name becomes edge label
alice.knows = bob
# You can also do lists: alice.knows = [bob, charlie]
# Optional: add weights and custom relationship types
bob.knows = (Edge(weight=0.9, relationship_type="friend_of"), charlie)
await add_data_points([alice, bob, charlie])
asyncio.run(main())

View file

@ -0,0 +1,30 @@
import asyncio
import cognee
from cognee.api.v1.search import SearchType
custom_prompt = """
Extract only people and cities as entities.
Connect people to cities with the relationship "lives_in".
Ignore all other entities.
"""
async def main():
await cognee.add(
[
"Alice moved to Paris in 2010, while Bob has always lived in New York.",
"Andreas was born in Venice, but later settled in Lisbon.",
"Diana and Tom were born and raised in Helsingy. Diana currently resides in Berlin, while Tom never moved.",
]
)
await cognee.cognify(custom_prompt=custom_prompt)
res = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION,
query_text="Where does Alice live?",
)
print(res)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,53 @@
import asyncio
from typing import Any, Dict, List
from pydantic import BaseModel, SkipValidation
import cognee
from cognee.modules.engine.operations.setup import setup
from cognee.infrastructure.llm.LLMGateway import LLMGateway
from cognee.infrastructure.engine import DataPoint
from cognee.tasks.storage import add_data_points
from cognee.modules.pipelines import Task, run_pipeline
class Person(DataPoint):
name: str
# Optional relationships (we'll let the LLM populate this)
knows: List["Person"] = []
# Make names searchable in the vector store
metadata: Dict[str, Any] = {"index_fields": ["name"]}
class People(BaseModel):
persons: List[Person]
async def extract_people(text: str) -> List[Person]:
system_prompt = (
"Extract people mentioned in the text. "
"Return as `persons: Person[]` with each Person having `name` and optional `knows` relations. "
"If the text says someone knows someone set `knows` accordingly. "
"Only include facts explicitly stated."
)
people = await LLMGateway.acreate_structured_output(text, system_prompt, People)
return people.persons
async def main():
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
await setup()
text = "Alice knows Bob."
tasks = [
Task(extract_people), # input: text -> output: list[Person]
Task(add_data_points), # input: list[Person] -> output: list[Person]
]
async for _ in run_pipeline(tasks=tasks, data=text, datasets=["people_demo"]):
pass
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,13 @@
import asyncio
import cognee
from cognee.api.v1.visualize.visualize import visualize_graph
async def main():
await cognee.add(["Alice knows Bob.", "NLP is a subfield of CS."])
await cognee.cognify()
await visualize_graph("./graph_after_cognify.html")
asyncio.run(main())

View file

@ -0,0 +1,31 @@
import asyncio
from pydantic import BaseModel
from typing import List
from cognee.infrastructure.llm.LLMGateway import LLMGateway
class MiniEntity(BaseModel):
name: str
type: str
class MiniGraph(BaseModel):
nodes: List[MiniEntity]
async def main():
system_prompt = (
"Extract entities as nodes with name and type. "
"Use concise, literal values present in the text."
)
text = "Apple develops iPhone; Audi produces the R8."
result = await LLMGateway.acreate_structured_output(text, system_prompt, MiniGraph)
print(result)
# MiniGraph(nodes=[MiniEntity(name='Apple', type='Organization'), ...])
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,29 @@
import asyncio
import cognee
from cognee import SearchType
async def main():
# 1) Add two short chats and build a graph
await cognee.add(
[
"We follow PEP8. Add type hints and docstrings.",
"Releases should not be on Friday. Susan must review PRs.",
],
dataset_name="rules_demo",
)
await cognee.cognify(datasets=["rules_demo"]) # builds graph
# 2) Enrich the graph (uses default memify tasks)
await cognee.memify(dataset="rules_demo")
# 3) Query the new coding rules
rules = await cognee.search(
query_type=SearchType.CODING_RULES,
query_text="List coding rules",
node_name=["coding_agent_rules"],
)
print("Rules:", rules)
asyncio.run(main())

View file

@ -0,0 +1,30 @@
import asyncio
import cognee
async def main():
texts = ["Audi produces the R8 and e-tron.", "Apple develops iPhone and MacBook."]
await cognee.add(texts)
# or: await cognee.add("/path/to/folder/of/files")
import os
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
ontology_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "ontology_input_example/basic_ontology.owl"
)
# Create full config structure manually
config: Config = {
"ontology_config": {
"ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path)
}
}
await cognee.cognify(config=config)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,25 @@
import asyncio
import cognee
async def main():
# Single file
await cognee.add("s3://cognee-temp/2024-11-04.md")
# Folder/prefix (recursively expands)
await cognee.add("s3://cognee-temp")
# Mixed list
await cognee.add(
[
"s3://cognee-temp/2024-11-04.md",
"Some inline text to ingest",
]
)
# Process the data
await cognee.cognify()
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,17 @@
import asyncio
import cognee
async def main():
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
text = "First rule of coding: Do not talk about coding."
# Make sure you've already run cognee.cognify(...) so the graph has content
answers = await cognee.search(query_text="What are the main themes in my data?")
for answer in answers:
print(answer)
asyncio.run(main())

View file

@ -0,0 +1,57 @@
import asyncio
import cognee
async def main():
text = """
In 1998 the project launched. In 2001 version 1.0 shipped. In 2004 the team merged
with another group. In 2010 support for v1 ended.
"""
await cognee.add(text, dataset_name="timeline_demo")
await cognee.cognify(datasets=["timeline_demo"], temporal_cognify=True)
from cognee.api.v1.search import SearchType
# Before / after queries
result = await cognee.search(
query_type=SearchType.TEMPORAL, query_text="What happened before 2000?", top_k=10
)
assert result != []
result = await cognee.search(
query_type=SearchType.TEMPORAL, query_text="What happened after 2010?", top_k=10
)
assert result != []
# Between queries
result = await cognee.search(
query_type=SearchType.TEMPORAL, query_text="Events between 2001 and 2004", top_k=10
)
assert result != []
# Scoped descriptions
result = await cognee.search(
query_type=SearchType.TEMPORAL,
query_text="Key project milestones between 1998 and 2010",
top_k=10,
)
assert result != []
result = await cognee.search(
query_type=SearchType.TEMPORAL,
query_text="What happened after 2004?",
datasets=["timeline_demo"],
top_k=10,
)
assert result != []
if __name__ == "__main__":
asyncio.run(main())