diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index bc2a7188d..474760ebc 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -75,6 +75,14 @@ async def cognify(datasets: Union[str, List[str]] = None): dataset_files.append((added_dataset, db_engine.get_files_metadata(added_dataset))) + print(dataset_files) + + # topology can be inferred, loaded, or extrapolated from the data in the end of the flow + # for code generation, we infer the topology from the folder structure as simple step + + + + graph_topology = infrastructure_config.get_config()["graph_topology"] diff --git a/cognee/infrastructure/llm/prompts/extract_topology.txt b/cognee/infrastructure/llm/prompts/extract_topology.txt new file mode 100644 index 000000000..f3b21077f --- /dev/null +++ b/cognee/infrastructure/llm/prompts/extract_topology.txt @@ -0,0 +1 @@ +You are a topology master and need to extract the following topology information from the text provided to you \ No newline at end of file diff --git a/cognee/modules/topology/__init__.py b/cognee/modules/topology/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cognee/modules/topology/extraction/extract_topology.py b/cognee/modules/topology/extraction/extract_topology.py new file mode 100644 index 000000000..7fd6fc8ac --- /dev/null +++ b/cognee/modules/topology/extraction/extract_topology.py @@ -0,0 +1,14 @@ +from typing import Type, List +from pydantic import BaseModel +from cognee.infrastructure.llm.prompts import read_query_prompt +from cognee.infrastructure.llm.get_llm_client import get_llm_client + + +async def extract_categories(content: str, response_model: Type[BaseModel]): + llm_client = get_llm_client() + + system_prompt = read_query_prompt("extract_topology.txt") + + llm_output = await llm_client.acreate_structured_output(content, system_prompt, response_model) + + return llm_output.model_dump() \ No newline at end of file diff --git a/cognee/modules/topology/infer_data_topology.py b/cognee/modules/topology/infer_data_topology.py new file mode 100644 index 000000000..935cc702e --- /dev/null +++ b/cognee/modules/topology/infer_data_topology.py @@ -0,0 +1,19 @@ +import logging +from typing import List, Dict +from cognee.infrastructure import infrastructure_config +from cognee.modules.topology.extraction.extract_topology import extract_categories + + +logger = logging.getLogger(__name__) + +async def infer_data_topology(content: str, graph_topology=None): + if graph_topology is None: + graph_topology = infrastructure_config.get_config()["graph_topology"] + try: + return (await extract_categories( + content, + graph_topology + )) + except Exception as error: + logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True) + raise error diff --git a/cognee/modules/topology/topology.py b/cognee/modules/topology/topology.py new file mode 100644 index 000000000..c65a13272 --- /dev/null +++ b/cognee/modules/topology/topology.py @@ -0,0 +1,120 @@ + +import os +import glob +from pydantic import BaseModel, create_model +from typing import Dict, Type, Any + +from pydantic import BaseModel, Field +from typing import Dict, List, Optional, Union +from datetime import datetime + +from cognee import config +from cognee.infrastructure import infrastructure_config +from infer_data_topology import infer_data_topology + + + +# class UserLocation(BaseModel): +# location_id: str +# description: str +# default_relationship: Relationship = Relationship(type = "located_in") +# +# class UserProperties(BaseModel): +# custom_properties: Optional[Dict[str, Any]] = None +# location: Optional[UserLocation] = None +# +# class DefaultGraphModel(BaseModel): +# node_id: str +# user_properties: UserProperties = UserProperties() +# documents: List[Document] = [] +# default_fields: Optional[Dict[str, Any]] = {} +# default_relationship: Relationship = Relationship(type = "has_properties") +# +class Relationship(BaseModel): + type: str + source: Optional[str] = None + target: Optional[str] = None + properties: Optional[Dict[str, Any]] = None + + + +class Document(BaseModel): + id: str + title: str + description: Optional[str] = None + default_relationship: Relationship = Field(default_factory=lambda: Relationship(type="belongs_to")) + + +class DirectoryModel(BaseModel): + name: str + path: str + summary: str + documents: List[Document] = [] + subdirectories: List['DirectoryModel'] = [] + default_relationship: Relationship = Field(default_factory=lambda: Relationship(type="belongs_to")) + +DirectoryModel.update_forward_refs() + +class RepositoryMetadata(BaseModel): + name: str + summary: str + owner: str + description: Optional[str] = None + directories: List[DirectoryModel] = [] + documents: List[Document] = [] + default_relationship: Relationship = Field(default_factory=lambda: Relationship(type="belongs_to")) + +class GitHubRepositoryModel(BaseModel): + metadata: RepositoryMetadata + root_directory: DirectoryModel + +class TopologyEngine: + def __init__(self): + self.models: Dict[str, Type[BaseModel]] = {} + + async def infer(self, repository: str): + + path = infrastructure_config.get_config()["data_root_directory"] + + path = path +"/"+ str(repository) + print(path) + if not os.path.exists(path): + raise FileNotFoundError(f"No such directory: {path}") + + file_structure = {} + for filename in glob.glob(f"{path}/**", recursive=True): + if os.path.isfile(filename): + key = os.path.relpath(filename, start=path).replace(os.path.sep, "__") + file_structure[key] = (str, ...) # Assuming content as string for simplicity + + + result = await infer_data_topology(str(file_structure), GitHubRepositoryModel) + + return result + + def load(self, model_name: str): + return self.models.get(model_name) + + def extrapolate(self, model_name: str): + # This method would be implementation-specific depending on what "extrapolate" means + pass + + +if __name__ == "__main__": + data_directory_path = os.path.abspath("../../../.data") + print(data_directory_path) + config.data_root_directory(data_directory_path) + cognee_directory_path = os.path.abspath("../.cognee_system") + config.system_root_directory(cognee_directory_path) + async def main(): + engine = TopologyEngine() + # model = engine.load("GitHubRepositoryModel") + # if model is None: + # raise ValueError("Model not found") + result = await engine.infer("example") + print(result) + + import asyncio + asyncio.run(main()) + # result = engine.extrapolate("GitHubRepositoryModel") + # print(result) \ No newline at end of file