Fixes to llm calls and general logic updates

This commit is contained in:
Vasilije 2024-03-07 14:06:19 +01:00
parent b530f19e49
commit 06f701eb23
8 changed files with 114 additions and 17 deletions

View file

@ -4,7 +4,13 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "13be50c7-167c-4a03-bd75-53904baa1f8c", "id": "13be50c7-167c-4a03-bd75-53904baa1f8c",
"metadata": {}, "metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [], "outputs": [],
"source": [] "source": []
}, },
@ -348,6 +354,58 @@
"system_prompt = f\"\"\" Classify content based on the following categories: {str(classification)}\"\"\"" "system_prompt = f\"\"\" Classify content based on the following categories: {str(classification)}\"\"\""
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "8c7108ab-5cef-4530-8bea-3b3f96b5b302",
"metadata": {},
"outputs": [],
"source": [
"cognitive_architecture/modules/cognify/llm/classify_content.py"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "14ef9446-ec16-4657-9f83-a4c1c9ef2eba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/Users/vasa/Projects/cognee\n"
]
}
],
"source": [
"import os\n",
"print(os.getcwd())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f97f11f1-4490-49ea-b193-1f858e72893b",
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'cognitive_architecture.modules.cognify.create_vector_memory'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcognitive_architecture\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodules\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcognify\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mllm\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclassify_content\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m content_to_cog_layers\n",
"File \u001b[0;32m~/Projects/cognee/cognitive_architecture/modules/cognify/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcreate_vector_memory\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m create_vector_memory\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'cognitive_architecture.modules.cognify.create_vector_memory'"
]
}
],
"source": [
"from cognitive_architecture.modules.cognify.llm.classify_content import content_to_cog_layers"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 5,
@ -1778,10 +1836,22 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 226, "execution_count": 4,
"id": "599cd4f9-4f8d-4321-83a5-fa153d029115", "id": "599cd4f9-4f8d-4321-83a5-fa153d029115",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'cognitive_architecture.openai_tools'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[4], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01masyncio\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcognitive_architecture\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mopenai_tools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m async_get_embedding_with_backoff\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Ensure your async function is defined in a cell above or within this cell\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# async def run_async_function(text):\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 10\u001b[0m \n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Use asyncio.run() to run the async function\u001b[39;00m\n\u001b[1;32m 12\u001b[0m loop \u001b[38;5;241m=\u001b[39m asyncio\u001b[38;5;241m.\u001b[39mget_event_loop()\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'cognitive_architecture.openai_tools'"
]
}
],
"source": [ "source": [
"import asyncio\n", "import asyncio\n",
"from cognitive_architecture.openai_tools import async_get_embedding_with_backoff\n", "from cognitive_architecture.openai_tools import async_get_embedding_with_backoff\n",

View file

@ -157,7 +157,7 @@ class OpenAIAdapter(LLMInterface):
"""Wrapper around Embedding.create w/ backoff""" """Wrapper around Embedding.create w/ backoff"""
return openai.embeddings.create(**kwargs) return openai.embeddings.create(**kwargs)
def get_embedding_with_backoffself(self, text: str, model: str = "text-embedding-ada-002"): def get_embedding_with_backoff(self, text: str, model: str = "text-embedding-ada-002"):
"""To get text embeddings, import/call this function """To get text embeddings, import/call this function
It specifies defaults + handles rate-limiting It specifies defaults + handles rate-limiting
:param text: str :param text: str
@ -183,7 +183,8 @@ class OpenAIAdapter(LLMInterface):
async def acreate_structured_output(self, text_input: str, system_prompt_path: str, response_model: Type[BaseModel]) -> BaseModel: async def acreate_structured_output(self, text_input: str, system_prompt_path: str, response_model: Type[BaseModel]) -> BaseModel:
"""Generate a response from a user query.""" """Generate a response from a user query."""
system_prompt = read_query_prompt(system_prompt_path) system_prompt = read_query_prompt(system_prompt_path)
return self.aclient.chat.completions.create(
return await self.aclient.chat.completions.create(
model=self.model, model=self.model,
messages=[ messages=[
{ {

View file

@ -1,3 +1,5 @@
You are a classification engine and should classify content. Make sure to use one of the existing classification options nad not invent your own.
The possible classifications are:
{ {
"Natural Language Text": { "Natural Language Text": {
"type": "TEXT", "type": "TEXT",

View file

@ -13,8 +13,14 @@ async def content_to_cog_layers(text_input: str,system_prompt_path:str, response
return await llm_client.acreate_structured_output(text_input,system_prompt_path, response_model) return await llm_client.acreate_structured_output(text_input,system_prompt_path, response_model)
# Your async function definitions and other code here...
if __name__ == "__main__": if __name__ == "__main__":
import asyncio
asyncio.run(content_to_cog_layers("""Russia summons US ambassador in Moscow and says it will expel diplomats who meddle in its internal affairs
The Russian foreign ministry said on Thursday it had summoned the US ambassador in Moscow and warned her against attempts to interfere in the internal affairs of the Russian Federation, reports Reuters.
content_to_cog_layers("test", "test", ContentPrediction) Ahead of a March presidential election, it said in a statement that such behaviour would be firmly and resolutely suppressed, up to and including the expulsion as persona non grata of US embassy staff involved in such actions.""", "classify_content.txt", ContentPrediction))

View file

@ -134,31 +134,31 @@ class ContentType(BaseModel):
type: str type: str
class TextContent(ContentType): class TextContent(ContentType):
type = "TEXT" type:str = "TEXT"
subclass: List[TextSubclass] subclass: List[TextSubclass]
class AudioContent(ContentType): class AudioContent(ContentType):
type = "AUDIO" type:str = "AUDIO"
subclass: List[AudioSubclass] subclass: List[AudioSubclass]
class ImageContent(ContentType): class ImageContent(ContentType):
type = "IMAGE" type:str = "IMAGE"
subclass: List[ImageSubclass] subclass: List[ImageSubclass]
class VideoContent(ContentType): class VideoContent(ContentType):
type = "VIDEO" type:str = "VIDEO"
subclass: List[VideoSubclass] subclass: List[VideoSubclass]
class MultimediaContent(ContentType): class MultimediaContent(ContentType):
type = "MULTIMEDIA" type:str = "MULTIMEDIA"
subclass: List[MultimediaSubclass] subclass: List[MultimediaSubclass]
class Model3DContent(ContentType): class Model3DContent(ContentType):
type = "3D_MODEL" type:str = "3D_MODEL"
subclass: List[Model3DSubclass] subclass: List[Model3DSubclass]
class ProceduralContent(ContentType): class ProceduralContent(ContentType):
type = "PROCEDURAL" type:str = "PROCEDURAL"
subclass: List[ProceduralSubclass] subclass: List[ProceduralSubclass]
class ContentPrediction(BaseModel): class ContentPrediction(BaseModel):

View file

@ -287,7 +287,13 @@ async def get_memory_name_by_doc_id(session: AsyncSession, docs_id: str):
def read_query_prompt(filename: str) -> str: def read_query_prompt(filename: str) -> str:
"""Read a query prompt from a file.""" """Read a query prompt from a file."""
file_path = Path(filename) script_directory = Path(__file__).parent
# Set the base directory relative to the script's directory
base_directory = script_directory.parent / "cognitive_architecture/infrastructure/llm/prompts"
# Construct the full file path
file_path = base_directory / filename
try: try:
return file_path.read_text() return file_path.read_text()
except FileNotFoundError: except FileNotFoundError:

13
poetry.lock generated
View file

@ -2410,6 +2410,17 @@ typing-extensions = ">=4.7,<5"
[package.extras] [package.extras]
datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
[[package]]
name = "overrides"
version = "7.7.0"
description = "A decorator to automatically detect mismatch when overriding a method."
optional = false
python-versions = ">=3.6"
files = [
{file = "overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49"},
{file = "overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a"},
]
[[package]] [[package]]
name = "packaging" name = "packaging"
version = "23.2" version = "23.2"
@ -4234,4 +4245,4 @@ weaviate = []
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.10" python-versions = "^3.10"
content-hash = "3d583c04bdb28b91b12a9544ddca78e322abb21e980e32c3070ff7d0ac82acc6" content-hash = "39962c40053f064940d94532130d182d5a0a0526c915c39a0c48f98f21321630"

View file

@ -40,6 +40,7 @@ pymupdf = "^1.23.25"
pandas = "^2.2.1" pandas = "^2.2.1"
greenlet = "^3.0.3" greenlet = "^3.0.3"
ruff = "^0.2.2" ruff = "^0.2.2"
overrides = "^7.7.0"
[tool.poetry.extras] [tool.poetry.extras]
dbt = ["dbt-core", "dbt-redshift", "dbt-bigquery", "dbt-duckdb", "dbt-snowflake", "dbt-athena-community", "dbt-databricks"] dbt = ["dbt-core", "dbt-redshift", "dbt-bigquery", "dbt-duckdb", "dbt-snowflake", "dbt-athena-community", "dbt-databricks"]
@ -66,7 +67,7 @@ databricks = ["databricks-sql-connector"]
lancedb = ["lancedb"] lancedb = ["lancedb"]
pinecone = ["pinecone-client"] pinecone = ["pinecone-client"]
neo4j = ["neo4j", "py2neo"] neo4j = ["neo4j", "py2neo"]
notebook =[ "ipykernel", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"] notebook =[ "ipykernel","overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
pytest = "^7.4.0" pytest = "^7.4.0"