Fixes to llm calls and general logic updates

2024-03-07 14:06:19 +01:00 · 2024-03-07 14:06:19 +01:00 · 06f701eb23
commit 06f701eb23
parent b530f19e49
8 changed files with 114 additions and 17 deletions
--- a/Demo_graph.ipynb
+++ b/Demo_graph.ipynb
@ -4,7 +4,13 @@
   "cell_type": "code",
   "execution_count": null,
   "id": "13be50c7-167c-4a03-bd75-53904baa1f8c",
-   "metadata": {},
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
   "outputs": [],
   "source": []
  },
@ -348,6 +354,58 @@
    "system_prompt = f\"\"\" Classify content based on the following categories: {str(classification)}\"\"\""
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8c7108ab-5cef-4530-8bea-3b3f96b5b302",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cognitive_architecture/modules/cognify/llm/classify_content.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "14ef9446-ec16-4657-9f83-a4c1c9ef2eba",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/Users/vasa/Projects/cognee\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "print(os.getcwd())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "f97f11f1-4490-49ea-b193-1f858e72893b",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'cognitive_architecture.modules.cognify.create_vector_memory'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcognitive_architecture\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodules\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcognify\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mllm\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclassify_content\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m content_to_cog_layers\n",
+      "File \u001b[0;32m~/Projects/cognee/cognitive_architecture/modules/cognify/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcreate_vector_memory\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m create_vector_memory\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'cognitive_architecture.modules.cognify.create_vector_memory'"
+     ]
+    }
+   ],
+   "source": [
+    "from cognitive_architecture.modules.cognify.llm.classify_content import content_to_cog_layers"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 5,
@ -1778,10 +1836,22 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 226,
+   "execution_count": 4,
   "id": "599cd4f9-4f8d-4321-83a5-fa153d029115",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'cognitive_architecture.openai_tools'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[4], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01masyncio\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcognitive_architecture\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mopenai_tools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m async_get_embedding_with_backoff\n\u001b[1;32m      3\u001b[0m \u001b[38;5;66;03m# Ensure your async function is defined in a cell above or within this cell\u001b[39;00m\n\u001b[1;32m      4\u001b[0m \n\u001b[1;32m      5\u001b[0m \u001b[38;5;66;03m# async def run_async_function(text):\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     10\u001b[0m \n\u001b[1;32m     11\u001b[0m \u001b[38;5;66;03m# Use asyncio.run() to run the async function\u001b[39;00m\n\u001b[1;32m     12\u001b[0m loop \u001b[38;5;241m=\u001b[39m asyncio\u001b[38;5;241m.\u001b[39mget_event_loop()\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'cognitive_architecture.openai_tools'"
+     ]
+    }
+   ],
   "source": [
    "import asyncio\n",
    "from cognitive_architecture.openai_tools import async_get_embedding_with_backoff\n",
--- a/cognitive_architecture/infrastructure/llm/openai/adapter.py
+++ b/cognitive_architecture/infrastructure/llm/openai/adapter.py
@ -157,7 +157,7 @@ class OpenAIAdapter(LLMInterface):
        """Wrapper around Embedding.create w/ backoff"""
        return openai.embeddings.create(**kwargs)

-    def get_embedding_with_backoffself(self, text: str, model: str = "text-embedding-ada-002"):
+    def get_embedding_with_backoff(self, text: str, model: str = "text-embedding-ada-002"):
        """To get text embeddings, import/call this function
        It specifies defaults + handles rate-limiting
        :param text: str
@ -183,7 +183,8 @@ class OpenAIAdapter(LLMInterface):
    async def acreate_structured_output(self, text_input: str, system_prompt_path: str, response_model: Type[BaseModel]) -> BaseModel:
        """Generate a response from a user query."""
        system_prompt = read_query_prompt(system_prompt_path)
-        return self.aclient.chat.completions.create(
+
+        return await self.aclient.chat.completions.create(
            model=self.model,
            messages=[
                {
--- a/cognitive_architecture/infrastructure/llm/prompts/classify_content.txt
+++ b/cognitive_architecture/infrastructure/llm/prompts/classify_content.txt
@ -1,3 +1,5 @@
+You are a classification engine and should classify content. Make sure to use one of the existing classification options nad not invent your own.
+The possible classifications are:
 {
    "Natural Language Text": {
        "type": "TEXT",
--- a/cognitive_architecture/modules/cognify/llm/classify_content.py
+++ b/cognitive_architecture/modules/cognify/llm/classify_content.py
@ -13,8 +13,14 @@ async def content_to_cog_layers(text_input: str,system_prompt_path:str, response
    return await llm_client.acreate_structured_output(text_input,system_prompt_path, response_model)


+
+
+# Your async function definitions and other code here...
+
 if __name__ == "__main__":
+    import asyncio
+    asyncio.run(content_to_cog_layers("""Russia summons US ambassador in Moscow and says it will expel diplomats who meddle in its internal affairs
+The Russian foreign ministry said on Thursday it had summoned the US ambassador in Moscow and warned her against “attempts to interfere in the internal affairs of the Russian Federation”, reports Reuters.

-    content_to_cog_layers("test", "test", ContentPrediction)
-
+Ahead of a March presidential election, it said in a statement that such behaviour would be “firmly and resolutely suppressed, up to and including the expulsion as ‘persona non grata’ of US embassy staff involved in such actions”.""", "classify_content.txt", ContentPrediction))

--- a/cognitive_architecture/shared/data_models.py
+++ b/cognitive_architecture/shared/data_models.py
@ -134,31 +134,31 @@ class ContentType(BaseModel):
    type: str

 class TextContent(ContentType):
-    type = "TEXT"
+    type:str = "TEXT"
    subclass: List[TextSubclass]

 class AudioContent(ContentType):
-    type = "AUDIO"
+    type:str = "AUDIO"
    subclass: List[AudioSubclass]

 class ImageContent(ContentType):
-    type = "IMAGE"
+    type:str = "IMAGE"
    subclass: List[ImageSubclass]

 class VideoContent(ContentType):
-    type = "VIDEO"
+    type:str = "VIDEO"
    subclass: List[VideoSubclass]

 class MultimediaContent(ContentType):
-    type = "MULTIMEDIA"
+    type:str = "MULTIMEDIA"
    subclass: List[MultimediaSubclass]

 class Model3DContent(ContentType):
-    type = "3D_MODEL"
+    type:str = "3D_MODEL"
    subclass: List[Model3DSubclass]

 class ProceduralContent(ContentType):
-    type = "PROCEDURAL"
+    type:str = "PROCEDURAL"
    subclass: List[ProceduralSubclass]

 class ContentPrediction(BaseModel):
--- a/cognitive_architecture/utils.py
+++ b/cognitive_architecture/utils.py
@ -287,7 +287,13 @@ async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):

 def read_query_prompt(filename: str) -> str:
    """Read a query prompt from a file."""
-    file_path = Path(filename)
+    script_directory = Path(__file__).parent
+
+    # Set the base directory relative to the script's directory
+    base_directory = script_directory.parent / "cognitive_architecture/infrastructure/llm/prompts"
+
+    # Construct the full file path
+    file_path = base_directory / filename
    try:
        return file_path.read_text()
    except FileNotFoundError:
--- a/poetry.lock
+++ b/poetry.lock
@ -2410,6 +2410,17 @@ typing-extensions = ">=4.7,<5"
 [package.extras]
 datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]

+[[package]]
+name = "overrides"
+version = "7.7.0"
+description = "A decorator to automatically detect mismatch when overriding a method."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49"},
+    {file = "overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a"},
+]
+
 [[package]]
 name = "packaging"
 version = "23.2"
@ -4234,4 +4245,4 @@ weaviate = []
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "3d583c04bdb28b91b12a9544ddca78e322abb21e980e32c3070ff7d0ac82acc6"
+content-hash = "39962c40053f064940d94532130d182d5a0a0526c915c39a0c48f98f21321630"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -40,6 +40,7 @@ pymupdf = "^1.23.25"
 pandas = "^2.2.1"
 greenlet = "^3.0.3"
 ruff = "^0.2.2"
+overrides = "^7.7.0"

 [tool.poetry.extras]
 dbt = ["dbt-core", "dbt-redshift", "dbt-bigquery", "dbt-duckdb", "dbt-snowflake", "dbt-athena-community", "dbt-databricks"]
@ -66,7 +67,7 @@ databricks = ["databricks-sql-connector"]
 lancedb = ["lancedb"]
 pinecone = ["pinecone-client"]
 neo4j = ["neo4j", "py2neo"]
-notebook =[ "ipykernel", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]
+notebook =[ "ipykernel","overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]

 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.0"