Fixes to llm calls and general logic updates

This commit is contained in:
Vasilije 2024-03-07 14:06:19 +01:00
parent b530f19e49
commit 06f701eb23
8 changed files with 114 additions and 17 deletions

View file

@ -4,7 +4,13 @@
"cell_type": "code",
"execution_count": null,
"id": "13be50c7-167c-4a03-bd75-53904baa1f8c",
"metadata": {},
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": []
},
@ -348,6 +354,58 @@
"system_prompt = f\"\"\" Classify content based on the following categories: {str(classification)}\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c7108ab-5cef-4530-8bea-3b3f96b5b302",
"metadata": {},
"outputs": [],
"source": [
"cognitive_architecture/modules/cognify/llm/classify_content.py"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "14ef9446-ec16-4657-9f83-a4c1c9ef2eba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/Users/vasa/Projects/cognee\n"
]
}
],
"source": [
"import os\n",
"print(os.getcwd())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f97f11f1-4490-49ea-b193-1f858e72893b",
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'cognitive_architecture.modules.cognify.create_vector_memory'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcognitive_architecture\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodules\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcognify\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mllm\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclassify_content\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m content_to_cog_layers\n",
"File \u001b[0;32m~/Projects/cognee/cognitive_architecture/modules/cognify/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcreate_vector_memory\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m create_vector_memory\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'cognitive_architecture.modules.cognify.create_vector_memory'"
]
}
],
"source": [
"from cognitive_architecture.modules.cognify.llm.classify_content import content_to_cog_layers"
]
},
{
"cell_type": "code",
"execution_count": 5,
@ -1778,10 +1836,22 @@
},
{
"cell_type": "code",
"execution_count": 226,
"execution_count": 4,
"id": "599cd4f9-4f8d-4321-83a5-fa153d029115",
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'cognitive_architecture.openai_tools'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[4], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01masyncio\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcognitive_architecture\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mopenai_tools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m async_get_embedding_with_backoff\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Ensure your async function is defined in a cell above or within this cell\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# async def run_async_function(text):\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 10\u001b[0m \n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Use asyncio.run() to run the async function\u001b[39;00m\n\u001b[1;32m 12\u001b[0m loop \u001b[38;5;241m=\u001b[39m asyncio\u001b[38;5;241m.\u001b[39mget_event_loop()\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'cognitive_architecture.openai_tools'"
]
}
],
"source": [
"import asyncio\n",
"from cognitive_architecture.openai_tools import async_get_embedding_with_backoff\n",

View file

@ -157,7 +157,7 @@ class OpenAIAdapter(LLMInterface):
"""Wrapper around Embedding.create w/ backoff"""
return openai.embeddings.create(**kwargs)
def get_embedding_with_backoffself(self, text: str, model: str = "text-embedding-ada-002"):
def get_embedding_with_backoff(self, text: str, model: str = "text-embedding-ada-002"):
"""To get text embeddings, import/call this function
It specifies defaults + handles rate-limiting
:param text: str
@ -183,7 +183,8 @@ class OpenAIAdapter(LLMInterface):
async def acreate_structured_output(self, text_input: str, system_prompt_path: str, response_model: Type[BaseModel]) -> BaseModel:
"""Generate a response from a user query."""
system_prompt = read_query_prompt(system_prompt_path)
return self.aclient.chat.completions.create(
return await self.aclient.chat.completions.create(
model=self.model,
messages=[
{

View file

@ -1,3 +1,5 @@
You are a classification engine and should classify content. Make sure to use one of the existing classification options nad not invent your own.
The possible classifications are:
{
"Natural Language Text": {
"type": "TEXT",

View file

@ -13,8 +13,14 @@ async def content_to_cog_layers(text_input: str,system_prompt_path:str, response
return await llm_client.acreate_structured_output(text_input,system_prompt_path, response_model)
# Your async function definitions and other code here...
if __name__ == "__main__":
import asyncio
asyncio.run(content_to_cog_layers("""Russia summons US ambassador in Moscow and says it will expel diplomats who meddle in its internal affairs
The Russian foreign ministry said on Thursday it had summoned the US ambassador in Moscow and warned her against attempts to interfere in the internal affairs of the Russian Federation, reports Reuters.
content_to_cog_layers("test", "test", ContentPrediction)
Ahead of a March presidential election, it said in a statement that such behaviour would be firmly and resolutely suppressed, up to and including the expulsion as persona non grata of US embassy staff involved in such actions.""", "classify_content.txt", ContentPrediction))

View file

@ -134,31 +134,31 @@ class ContentType(BaseModel):
type: str
class TextContent(ContentType):
type = "TEXT"
type:str = "TEXT"
subclass: List[TextSubclass]
class AudioContent(ContentType):
type = "AUDIO"
type:str = "AUDIO"
subclass: List[AudioSubclass]
class ImageContent(ContentType):
type = "IMAGE"
type:str = "IMAGE"
subclass: List[ImageSubclass]
class VideoContent(ContentType):
type = "VIDEO"
type:str = "VIDEO"
subclass: List[VideoSubclass]
class MultimediaContent(ContentType):
type = "MULTIMEDIA"
type:str = "MULTIMEDIA"
subclass: List[MultimediaSubclass]
class Model3DContent(ContentType):
type = "3D_MODEL"
type:str = "3D_MODEL"
subclass: List[Model3DSubclass]
class ProceduralContent(ContentType):
type = "PROCEDURAL"
type:str = "PROCEDURAL"
subclass: List[ProceduralSubclass]
class ContentPrediction(BaseModel):

View file

@ -287,7 +287,13 @@ async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):
def read_query_prompt(filename: str) -> str:
"""Read a query prompt from a file."""
file_path = Path(filename)
script_directory = Path(__file__).parent
# Set the base directory relative to the script's directory
base_directory = script_directory.parent / "cognitive_architecture/infrastructure/llm/prompts"
# Construct the full file path
file_path = base_directory / filename
try:
return file_path.read_text()
except FileNotFoundError:

13
poetry.lock generated
View file

@ -2410,6 +2410,17 @@ typing-extensions = ">=4.7,<5"
[package.extras]
datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
[[package]]
name = "overrides"
version = "7.7.0"
description = "A decorator to automatically detect mismatch when overriding a method."
optional = false
python-versions = ">=3.6"
files = [
{file = "overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49"},
{file = "overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a"},
]
[[package]]
name = "packaging"
version = "23.2"
@ -4234,4 +4245,4 @@ weaviate = []
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "3d583c04bdb28b91b12a9544ddca78e322abb21e980e32c3070ff7d0ac82acc6"
content-hash = "39962c40053f064940d94532130d182d5a0a0526c915c39a0c48f98f21321630"

View file

@ -40,6 +40,7 @@ pymupdf = "^1.23.25"
pandas = "^2.2.1"
greenlet = "^3.0.3"
ruff = "^0.2.2"
overrides = "^7.7.0"
[tool.poetry.extras]
dbt = ["dbt-core", "dbt-redshift", "dbt-bigquery", "dbt-duckdb", "dbt-snowflake", "dbt-athena-community", "dbt-databricks"]
@ -66,7 +67,7 @@ databricks = ["databricks-sql-connector"]
lancedb = ["lancedb"]
pinecone = ["pinecone-client"]
neo4j = ["neo4j", "py2neo"]
notebook =[ "ipykernel", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]
notebook =[ "ipykernel","overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.0"