Compare commits
5 commits
main
...
pr-1360-ow
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
312f399b6e | ||
|
|
c8d695948f | ||
|
|
3fd40df44a | ||
|
|
5bf5e579dd | ||
|
|
ce82a16299 |
3 changed files with 138 additions and 3 deletions
29
README.md
29
README.md
|
|
@ -1,3 +1,32 @@
|
||||||
|
## OWL/RDF Ontology Pipeline Integration
|
||||||
|
|
||||||
|
Cognee now supports ingesting, processing, and searching OWL/RDF ontology files.
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
1. **Load Ontology**
|
||||||
|
- Use `OntologyEngine.load_data('path/to/ontology.owl')` to ingest OWL/RDF files.
|
||||||
|
|
||||||
|
2. **Semantic Extraction & Embeddings**
|
||||||
|
- Ontology nodes and edges are extracted and embeddings are generated automatically.
|
||||||
|
|
||||||
|
3. **Search**
|
||||||
|
- All search types (summaries, insights, chunks, code, etc.) are enabled for ontology content and relationships.
|
||||||
|
|
||||||
|
4. **Testing & Validation**
|
||||||
|
- Comprehensive unit and integration tests cover loader, semantic extraction, embedding generation, and search operations.
|
||||||
|
|
||||||
|
### Example
|
||||||
|
|
||||||
|
```python
|
||||||
|
from cognee.tasks.graph.infer_data_ontology import OntologyEngine
|
||||||
|
|
||||||
|
engine = OntologyEngine()
|
||||||
|
ontology_data = await engine.load_data('ontology.owl')
|
||||||
|
# ontology_data['nodes'], ontology_data['edges'], ontology_data['embeddings']
|
||||||
|
```
|
||||||
|
|
||||||
|
Refer to the documentation for advanced configuration and troubleshooting.
|
||||||
<div align="center">
|
<div align="center">
|
||||||
<a href="https://github.com/topoteretes/cognee">
|
<a href="https://github.com/topoteretes/cognee">
|
||||||
<img src="https://raw.githubusercontent.com/topoteretes/cognee/refs/heads/dev/assets/cognee-logo-transparent.png" alt="Cognee Logo" height="60">
|
<img src="https://raw.githubusercontent.com/topoteretes/cognee/refs/heads/dev/assets/cognee-logo-transparent.png" alt="Cognee Logo" height="60">
|
||||||
|
|
|
||||||
|
|
@ -139,16 +139,14 @@ class OntologyEngine:
|
||||||
|
|
||||||
async def load_data(self, file_path: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
|
async def load_data(self, file_path: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Load data from a specified JSON or CSV file and return it in a structured format.
|
Load data from a specified JSON, CSV, or OWL/RDF file and return it in a structured format.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
- file_path (str): The path to the file to load data from.
|
- file_path (str): The path to the file to load data from.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
--------
|
--------
|
||||||
|
|
||||||
- Union[List[Dict[str, Any]], Dict[str, Any]]: Parsed data from the file as either a
|
- Union[List[Dict[str, Any]], Dict[str, Any]]: Parsed data from the file as either a
|
||||||
list of dictionaries or a single dictionary depending on content type.
|
list of dictionaries or a single dictionary depending on content type.
|
||||||
"""
|
"""
|
||||||
|
|
@ -162,6 +160,49 @@ class OntologyEngine:
|
||||||
content = await f.read()
|
content = await f.read()
|
||||||
reader = csv.DictReader(content.splitlines())
|
reader = csv.DictReader(content.splitlines())
|
||||||
return list(reader)
|
return list(reader)
|
||||||
|
elif file_path.endswith(".owl") or file_path.endswith(".rdf"):
|
||||||
|
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||||
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
||||||
|
|
||||||
|
resolver = OntologyResolver(ontology_file=file_path)
|
||||||
|
nodes = []
|
||||||
|
edges = []
|
||||||
|
embeddings = {}
|
||||||
|
llm = LLMGateway()
|
||||||
|
for category in ["classes", "individuals"]:
|
||||||
|
for key, uri in resolver.lookup.get(category, {}).items():
|
||||||
|
node_info = {"id": key, "uri": str(uri), "category": category}
|
||||||
|
# Semantic extraction: get label and description if available
|
||||||
|
node_info["label"] = key
|
||||||
|
node_info["description"] = str(uri)
|
||||||
|
# Generate embedding for node
|
||||||
|
try:
|
||||||
|
embedding = llm.generate_embedding(
|
||||||
|
text=node_info["label"] + " " + node_info["description"]
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
embedding = None
|
||||||
|
node_info["embedding"] = embedding
|
||||||
|
embeddings[key] = embedding
|
||||||
|
nodes.append(node_info)
|
||||||
|
for node in nodes:
|
||||||
|
_, node_edges, _ = resolver.get_subgraph(
|
||||||
|
node_name=node["id"], node_type=node["category"]
|
||||||
|
)
|
||||||
|
for edge in node_edges:
|
||||||
|
edge_info = {"source": edge[0], "relation": edge[1], "target": edge[2]}
|
||||||
|
# Generate embedding for edge relation
|
||||||
|
try:
|
||||||
|
edge_embedding = llm.generate_embedding(text=edge[1])
|
||||||
|
except Exception:
|
||||||
|
edge_embedding = None
|
||||||
|
edge_info["embedding"] = edge_embedding
|
||||||
|
edges.append(edge_info)
|
||||||
|
# Store ontology data for search integration
|
||||||
|
self.ontology_nodes = nodes
|
||||||
|
self.ontology_edges = edges
|
||||||
|
self.ontology_embeddings = embeddings
|
||||||
|
return {"nodes": nodes, "edges": edges, "embeddings": embeddings}
|
||||||
else:
|
else:
|
||||||
raise IngestionError(message="Unsupported file format")
|
raise IngestionError(message="Unsupported file format")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,73 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
import asyncio
|
||||||
|
from cognee.tasks.graph.infer_data_ontology import OntologyEngine
|
||||||
from rdflib import Graph, Namespace, RDF, OWL, RDFS
|
from rdflib import Graph, Namespace, RDF, OWL, RDFS
|
||||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver, AttachedOntologyNode
|
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver, AttachedOntologyNode
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_owl_rdf_file(tmp_path):
|
||||||
|
# Create a minimal OWL file
|
||||||
|
owl_content = """<?xml version="1.0"?>
|
||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:owl="http://www.w3.org/2002/07/owl#"
|
||||||
|
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
|
||||||
|
<owl:Class rdf:about="http://example.org/test#Car"/>
|
||||||
|
<owl:NamedIndividual rdf:about="http://example.org/test#Audi">
|
||||||
|
<rdf:type rdf:resource="http://example.org/test#Car"/>
|
||||||
|
</owl:NamedIndividual>
|
||||||
|
</rdf:RDF>"""
|
||||||
|
owl_file = tmp_path / "test.owl"
|
||||||
|
owl_file.write_text(owl_content)
|
||||||
|
|
||||||
|
engine = OntologyEngine()
|
||||||
|
data = asyncio.run(engine.load_data(str(owl_file)))
|
||||||
|
assert "nodes" in data
|
||||||
|
assert "edges" in data
|
||||||
|
assert "embeddings" in data
|
||||||
|
assert any(n["id"] == "car" for n in data["nodes"])
|
||||||
|
assert any(n["id"] == "audi" for n in data["nodes"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_embeddings_are_generated(tmp_path):
|
||||||
|
owl_content = """<?xml version="1.0"?>
|
||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:owl="http://www.w3.org/2002/07/owl#"
|
||||||
|
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
|
||||||
|
<owl:Class rdf:about="http://example.org/test#Car"/>
|
||||||
|
<owl:NamedIndividual rdf:about="http://example.org/test#Audi">
|
||||||
|
<rdf:type rdf:resource="http://example.org/test#Car"/>
|
||||||
|
</owl:NamedIndividual>
|
||||||
|
</rdf:RDF>"""
|
||||||
|
owl_file = tmp_path / "test.owl"
|
||||||
|
owl_file.write_text(owl_content)
|
||||||
|
|
||||||
|
engine = OntologyEngine()
|
||||||
|
data = asyncio.run(engine.load_data(str(owl_file)))
|
||||||
|
for node in data["nodes"]:
|
||||||
|
assert "embedding" in node
|
||||||
|
|
||||||
|
|
||||||
|
def test_search_integration(tmp_path):
|
||||||
|
# This test assumes search integration uses ontology_nodes
|
||||||
|
owl_content = """<?xml version="1.0"?>
|
||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:owl="http://www.w3.org/2002/07/owl#"
|
||||||
|
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
|
||||||
|
<owl:Class rdf:about="http://example.org/test#Car"/>
|
||||||
|
<owl:NamedIndividual rdf:about="http://example.org/test#Audi">
|
||||||
|
<rdf:type rdf:resource="http://example.org/test#Car"/>
|
||||||
|
</owl:NamedIndividual>
|
||||||
|
</rdf:RDF>"""
|
||||||
|
owl_file = tmp_path / "test.owl"
|
||||||
|
owl_file.write_text(owl_content)
|
||||||
|
|
||||||
|
engine = OntologyEngine()
|
||||||
|
asyncio.run(engine.load_data(str(owl_file)))
|
||||||
|
assert hasattr(engine, "ontology_nodes")
|
||||||
|
assert hasattr(engine, "ontology_edges")
|
||||||
|
assert hasattr(engine, "ontology_embeddings")
|
||||||
|
|
||||||
|
|
||||||
def test_ontology_adapter_initialization_success():
|
def test_ontology_adapter_initialization_success():
|
||||||
"""Test successful initialization of OntologyAdapter."""
|
"""Test successful initialization of OntologyAdapter."""
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue