From 06f701eb235bdbeb6cf7ff94a8179512cdc55299 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Thu, 7 Mar 2024 14:06:19 +0100 Subject: [PATCH] Fixes to llm calls and general logic updates --- Demo_graph.ipynb | 76 ++++++++++++++++++- .../infrastructure/llm/openai/adapter.py | 5 +- .../llm/prompts/classify_content.txt | 2 + .../modules/cognify/llm/classify_content.py | 10 ++- cognitive_architecture/shared/data_models.py | 14 ++-- cognitive_architecture/utils.py | 8 +- poetry.lock | 13 +++- pyproject.toml | 3 +- 8 files changed, 114 insertions(+), 17 deletions(-) diff --git a/Demo_graph.ipynb b/Demo_graph.ipynb index e626ef4db..bca5e9b69 100644 --- a/Demo_graph.ipynb +++ b/Demo_graph.ipynb @@ -4,7 +4,13 @@ "cell_type": "code", "execution_count": null, "id": "13be50c7-167c-4a03-bd75-53904baa1f8c", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [] }, @@ -348,6 +354,58 @@ "system_prompt = f\"\"\" Classify content based on the following categories: {str(classification)}\"\"\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c7108ab-5cef-4530-8bea-3b3f96b5b302", + "metadata": {}, + "outputs": [], + "source": [ + "cognitive_architecture/modules/cognify/llm/classify_content.py" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "14ef9446-ec16-4657-9f83-a4c1c9ef2eba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/vasa/Projects/cognee\n" + ] + } + ], + "source": [ + "import os\n", + "print(os.getcwd())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f97f11f1-4490-49ea-b193-1f858e72893b", + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'cognitive_architecture.modules.cognify.create_vector_memory'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcognitive_architecture\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodules\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcognify\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mllm\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclassify_content\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m content_to_cog_layers\n", + "File \u001b[0;32m~/Projects/cognee/cognitive_architecture/modules/cognify/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcreate_vector_memory\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m create_vector_memory\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'cognitive_architecture.modules.cognify.create_vector_memory'" + ] + } + ], + "source": [ + "from cognitive_architecture.modules.cognify.llm.classify_content import content_to_cog_layers" + ] + }, { "cell_type": "code", "execution_count": 5, @@ -1778,10 +1836,22 @@ }, { "cell_type": "code", - "execution_count": 226, + "execution_count": 4, "id": "599cd4f9-4f8d-4321-83a5-fa153d029115", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'cognitive_architecture.openai_tools'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[4], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01masyncio\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcognitive_architecture\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mopenai_tools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m async_get_embedding_with_backoff\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Ensure your async function is defined in a cell above or within this cell\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# async def run_async_function(text):\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 10\u001b[0m \n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Use asyncio.run() to run the async function\u001b[39;00m\n\u001b[1;32m 12\u001b[0m loop \u001b[38;5;241m=\u001b[39m asyncio\u001b[38;5;241m.\u001b[39mget_event_loop()\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'cognitive_architecture.openai_tools'" + ] + } + ], "source": [ "import asyncio\n", "from cognitive_architecture.openai_tools import async_get_embedding_with_backoff\n", diff --git a/cognitive_architecture/infrastructure/llm/openai/adapter.py b/cognitive_architecture/infrastructure/llm/openai/adapter.py index 02c71f51c..51d4ccdee 100644 --- a/cognitive_architecture/infrastructure/llm/openai/adapter.py +++ b/cognitive_architecture/infrastructure/llm/openai/adapter.py @@ -157,7 +157,7 @@ class OpenAIAdapter(LLMInterface): """Wrapper around Embedding.create w/ backoff""" return openai.embeddings.create(**kwargs) - def get_embedding_with_backoffself(self, text: str, model: str = "text-embedding-ada-002"): + def get_embedding_with_backoff(self, text: str, model: str = "text-embedding-ada-002"): """To get text embeddings, import/call this function It specifies defaults + handles rate-limiting :param text: str @@ -183,7 +183,8 @@ class OpenAIAdapter(LLMInterface): async def acreate_structured_output(self, text_input: str, system_prompt_path: str, response_model: Type[BaseModel]) -> BaseModel: """Generate a response from a user query.""" system_prompt = read_query_prompt(system_prompt_path) - return self.aclient.chat.completions.create( + + return await self.aclient.chat.completions.create( model=self.model, messages=[ { diff --git a/cognitive_architecture/infrastructure/llm/prompts/classify_content.txt b/cognitive_architecture/infrastructure/llm/prompts/classify_content.txt index 5f6e8038d..5a29d9319 100644 --- a/cognitive_architecture/infrastructure/llm/prompts/classify_content.txt +++ b/cognitive_architecture/infrastructure/llm/prompts/classify_content.txt @@ -1,3 +1,5 @@ +You are a classification engine and should classify content. Make sure to use one of the existing classification options nad not invent your own. +The possible classifications are: { "Natural Language Text": { "type": "TEXT", diff --git a/cognitive_architecture/modules/cognify/llm/classify_content.py b/cognitive_architecture/modules/cognify/llm/classify_content.py index 8b9a30eb1..a129d8abf 100644 --- a/cognitive_architecture/modules/cognify/llm/classify_content.py +++ b/cognitive_architecture/modules/cognify/llm/classify_content.py @@ -13,8 +13,14 @@ async def content_to_cog_layers(text_input: str,system_prompt_path:str, response return await llm_client.acreate_structured_output(text_input,system_prompt_path, response_model) + + +# Your async function definitions and other code here... + if __name__ == "__main__": + import asyncio + asyncio.run(content_to_cog_layers("""Russia summons US ambassador in Moscow and says it will expel diplomats who meddle in its internal affairs +The Russian foreign ministry said on Thursday it had summoned the US ambassador in Moscow and warned her against “attempts to interfere in the internal affairs of the Russian Federation”, reports Reuters. - content_to_cog_layers("test", "test", ContentPrediction) - +Ahead of a March presidential election, it said in a statement that such behaviour would be “firmly and resolutely suppressed, up to and including the expulsion as ‘persona non grata’ of US embassy staff involved in such actions”.""", "classify_content.txt", ContentPrediction)) diff --git a/cognitive_architecture/shared/data_models.py b/cognitive_architecture/shared/data_models.py index 2cfc7e92e..99b668cb3 100644 --- a/cognitive_architecture/shared/data_models.py +++ b/cognitive_architecture/shared/data_models.py @@ -134,31 +134,31 @@ class ContentType(BaseModel): type: str class TextContent(ContentType): - type = "TEXT" + type:str = "TEXT" subclass: List[TextSubclass] class AudioContent(ContentType): - type = "AUDIO" + type:str = "AUDIO" subclass: List[AudioSubclass] class ImageContent(ContentType): - type = "IMAGE" + type:str = "IMAGE" subclass: List[ImageSubclass] class VideoContent(ContentType): - type = "VIDEO" + type:str = "VIDEO" subclass: List[VideoSubclass] class MultimediaContent(ContentType): - type = "MULTIMEDIA" + type:str = "MULTIMEDIA" subclass: List[MultimediaSubclass] class Model3DContent(ContentType): - type = "3D_MODEL" + type:str = "3D_MODEL" subclass: List[Model3DSubclass] class ProceduralContent(ContentType): - type = "PROCEDURAL" + type:str = "PROCEDURAL" subclass: List[ProceduralSubclass] class ContentPrediction(BaseModel): diff --git a/cognitive_architecture/utils.py b/cognitive_architecture/utils.py index 668d678a1..32fd7d230 100644 --- a/cognitive_architecture/utils.py +++ b/cognitive_architecture/utils.py @@ -287,7 +287,13 @@ async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str): def read_query_prompt(filename: str) -> str: """Read a query prompt from a file.""" - file_path = Path(filename) + script_directory = Path(__file__).parent + + # Set the base directory relative to the script's directory + base_directory = script_directory.parent / "cognitive_architecture/infrastructure/llm/prompts" + + # Construct the full file path + file_path = base_directory / filename try: return file_path.read_text() except FileNotFoundError: diff --git a/poetry.lock b/poetry.lock index 037d580b0..a7d575402 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2410,6 +2410,17 @@ typing-extensions = ">=4.7,<5" [package.extras] datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +[[package]] +name = "overrides" +version = "7.7.0" +description = "A decorator to automatically detect mismatch when overriding a method." +optional = false +python-versions = ">=3.6" +files = [ + {file = "overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49"}, + {file = "overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a"}, +] + [[package]] name = "packaging" version = "23.2" @@ -4234,4 +4245,4 @@ weaviate = [] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "3d583c04bdb28b91b12a9544ddca78e322abb21e980e32c3070ff7d0ac82acc6" +content-hash = "39962c40053f064940d94532130d182d5a0a0526c915c39a0c48f98f21321630" diff --git a/pyproject.toml b/pyproject.toml index 3cd54583b..ae9f75648 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ pymupdf = "^1.23.25" pandas = "^2.2.1" greenlet = "^3.0.3" ruff = "^0.2.2" +overrides = "^7.7.0" [tool.poetry.extras] dbt = ["dbt-core", "dbt-redshift", "dbt-bigquery", "dbt-duckdb", "dbt-snowflake", "dbt-athena-community", "dbt-databricks"] @@ -66,7 +67,7 @@ databricks = ["databricks-sql-connector"] lancedb = ["lancedb"] pinecone = ["pinecone-client"] neo4j = ["neo4j", "py2neo"] -notebook =[ "ipykernel", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"] +notebook =[ "ipykernel","overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"] [tool.poetry.group.dev.dependencies] pytest = "^7.4.0"