From befb8ac237e43ba602958da0369ec4e35735392c Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 8 Sep 2025 21:16:24 +0200 Subject: [PATCH] feat: Save baml progress --- cognee/infrastructure/llm/LLMGateway.py | 26 +-- .../baml/baml_client/async_client.py | 43 ++--- .../baml/baml_client/inlinedbaml.py | 2 +- .../baml/baml_client/parser.py | 8 +- .../baml/baml_client/stream_types.py | 16 +- .../baml/baml_client/sync_client.py | 43 ++--- .../baml/baml_client/type_builder.py | 170 ++++++------------ .../baml/baml_client/type_map.py | 6 +- .../baml/baml_client/types.py | 16 +- .../baml_src/acreate_structured_output.baml | 16 +- .../extraction/acreate_structured_output.py | 111 +++++++++++- cognee/infrastructure/llm/utils.py | 4 +- 12 files changed, 221 insertions(+), 240 deletions(-) diff --git a/cognee/infrastructure/llm/LLMGateway.py b/cognee/infrastructure/llm/LLMGateway.py index 02cc25c48..eb77b47b5 100644 --- a/cognee/infrastructure/llm/LLMGateway.py +++ b/cognee/infrastructure/llm/LLMGateway.py @@ -26,7 +26,7 @@ class LLMGateway: ) return acreate_structured_output( - content=text_input, + text_input=text_input, system_prompt=system_prompt, response_model=response_model, ) @@ -142,19 +142,19 @@ class LLMGateway: @staticmethod def extract_summary(content: str, response_model: Type[BaseModel]) -> Coroutine: - llm_config = get_llm_config() - if llm_config.structured_output_framework.upper() == "BAML": - from cognee.infrastructure.llm.structured_output_framework.baml.baml_src.extraction import ( - extract_summary, - ) + # llm_config = get_llm_config() + # if llm_config.structured_output_framework.upper() == "BAML": + # from cognee.infrastructure.llm.structured_output_framework.baml.baml_src.extraction import ( + # extract_summary, + # ) + # + # return extract_summary(content=content, response_model=response_model) + # else: + from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import ( + extract_summary, + ) - return extract_summary(content=content, response_model=response_model) - else: - from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import ( - extract_summary, - ) - - return extract_summary(content=content, response_model=response_model) + return extract_summary(content=content, response_model=response_model) @staticmethod def extract_event_graph(content: str, response_model: Type[BaseModel]) -> Coroutine: diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py index e69544c48..884b22a9f 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py @@ -81,19 +81,15 @@ class BamlAsyncClient: async def AcreateStructuredOutput( self, - content: str, + text_input: str, system_prompt: str, - user_prompt: str, baml_options: BamlCallOptions = {}, - ) -> types.DynamicOutputModel: + ) -> types.ResponseModel: # Check if on_tick is provided if "on_tick" in baml_options: # Use streaming internally when on_tick is provided stream = self.stream.AcreateStructuredOutput( - content=content, - system_prompt=system_prompt, - user_prompt=user_prompt, - baml_options=baml_options, + text_input=text_input, system_prompt=system_prompt, baml_options=baml_options ) return await stream.get_final_response() else: @@ -101,14 +97,12 @@ class BamlAsyncClient: result = await self.__options.merge_options(baml_options).call_function_async( function_name="AcreateStructuredOutput", args={ - "content": content, + "text_input": text_input, "system_prompt": system_prompt, - "user_prompt": user_prompt, }, ) return typing.cast( - types.DynamicOutputModel, - result.cast_to(types, types, stream_types, False, __runtime__), + types.ResponseModel, result.cast_to(types, types, stream_types, False, __runtime__) ) async def ExtractCategories( @@ -267,27 +261,24 @@ class BamlStreamClient: def AcreateStructuredOutput( self, - content: str, + text_input: str, system_prompt: str, - user_prompt: str, baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlStream[stream_types.DynamicOutputModel, types.DynamicOutputModel]: + ) -> baml_py.BamlStream[stream_types.ResponseModel, types.ResponseModel]: ctx, result = self.__options.merge_options(baml_options).create_async_stream( function_name="AcreateStructuredOutput", args={ - "content": content, + "text_input": text_input, "system_prompt": system_prompt, - "user_prompt": user_prompt, }, ) - return baml_py.BamlStream[stream_types.DynamicOutputModel, types.DynamicOutputModel]( + return baml_py.BamlStream[stream_types.ResponseModel, types.ResponseModel]( result, lambda x: typing.cast( - stream_types.DynamicOutputModel, - x.cast_to(types, types, stream_types, True, __runtime__), + stream_types.ResponseModel, x.cast_to(types, types, stream_types, True, __runtime__) ), lambda x: typing.cast( - types.DynamicOutputModel, x.cast_to(types, types, stream_types, False, __runtime__) + types.ResponseModel, x.cast_to(types, types, stream_types, False, __runtime__) ), ctx, ) @@ -444,17 +435,15 @@ class BamlHttpRequestClient: async def AcreateStructuredOutput( self, - content: str, + text_input: str, system_prompt: str, - user_prompt: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: result = await self.__options.merge_options(baml_options).create_http_request_async( function_name="AcreateStructuredOutput", args={ - "content": content, + "text_input": text_input, "system_prompt": system_prompt, - "user_prompt": user_prompt, }, mode="request", ) @@ -563,17 +552,15 @@ class BamlHttpStreamRequestClient: async def AcreateStructuredOutput( self, - content: str, + text_input: str, system_prompt: str, - user_prompt: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: result = await self.__options.merge_options(baml_options).create_http_request_async( function_name="AcreateStructuredOutput", args={ - "content": content, + "text_input": text_input, "system_prompt": system_prompt, - "user_prompt": user_prompt, }, mode="stream", ) diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py index 3de9a2753..63b39bbad 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py @@ -11,7 +11,7 @@ # baml-cli is available with the baml package. _file_map = { - "acreate_structured_output.baml": "class DynamicInputModel {\n test string\n @@dynamic\n}\n\nclass DynamicOutputModel {\n test string\n @@dynamic\n}\n\nfunction AcreateStructuredOutput(\n content: string,\n system_prompt: string,\n user_prompt: string,\n) -> DynamicOutputModel {\n client OpenAI\n\n prompt #\"\n {{ system_prompt }}\n {{ ctx.output_format }}\n {{ _.role('user') }}\n {{ user_prompt }}\n {{ content }}\n \"#\n}\n", + "acreate_structured_output.baml": "class ResponseModel {\n @@dynamic\n}\n\nfunction AcreateStructuredOutput(\n text_input: string,\n system_prompt: string,\n) -> ResponseModel {\n client OpenAI\n\n prompt #\"\n {{ system_prompt }}\n {{ ctx.output_format }}\n {{ _.role('user') }}\n {{ text_input }}\n \"#\n}\n", "extract_categories.baml": '// Content classification data models - matching shared/data_models.py\nclass TextContent {\n type string\n subclass string[]\n}\n\nclass AudioContent {\n type string\n subclass string[]\n}\n\nclass ImageContent {\n type string\n subclass string[]\n}\n\nclass VideoContent {\n type string\n subclass string[]\n}\n\nclass MultimediaContent {\n type string\n subclass string[]\n}\n\nclass Model3DContent {\n type string\n subclass string[]\n}\n\nclass ProceduralContent {\n type string\n subclass string[]\n}\n\nclass ContentLabel {\n content_type "text" | "audio" | "image" | "video" | "multimedia" | "3d_model" | "procedural"\n type string\n subclass string[]\n}\n\nclass DefaultContentPrediction {\n label ContentLabel\n}\n\n// Content classification prompt template\ntemplate_string ClassifyContentPrompt() #"\n You are a classification engine and should classify content. Make sure to use one of the existing classification options and not invent your own.\n\n Classify the content into one of these main categories and their relevant subclasses:\n\n **TEXT CONTENT** (content_type: "text"):\n - type: "TEXTUAL_DOCUMENTS_USED_FOR_GENERAL_PURPOSES"\n - subclass options: ["Articles, essays, and reports", "Books and manuscripts", "News stories and blog posts", "Research papers and academic publications", "Social media posts and comments", "Website content and product descriptions", "Personal narratives and stories", "Spreadsheets and tables", "Forms and surveys", "Databases and CSV files", "Source code in various programming languages", "Shell commands and scripts", "Markup languages (HTML, XML)", "Stylesheets (CSS) and configuration files (YAML, JSON, INI)", "Chat transcripts and messaging history", "Customer service logs and interactions", "Conversational AI training data", "Textbook content and lecture notes", "Exam questions and academic exercises", "E-learning course materials", "Poetry and prose", "Scripts for plays, movies, and television", "Song lyrics", "Manuals and user guides", "Technical specifications and API documentation", "Helpdesk articles and FAQs", "Contracts and agreements", "Laws, regulations, and legal case documents", "Policy documents and compliance materials", "Clinical trial reports", "Patient records and case notes", "Scientific journal articles", "Financial reports and statements", "Business plans and proposals", "Market research and analysis reports", "Ad copies and marketing slogans", "Product catalogs and brochures", "Press releases and promotional content", "Professional and formal correspondence", "Personal emails and letters", "Image and video captions", "Annotations and metadata for various media", "Vocabulary lists and grammar rules", "Language exercises and quizzes", "Other types of text data"]\n\n **AUDIO CONTENT** (content_type: "audio"):\n - type: "AUDIO_DOCUMENTS_USED_FOR_GENERAL_PURPOSES"\n - subclass options: ["Music tracks and albums", "Podcasts and radio broadcasts", "Audiobooks and audio guides", "Recorded interviews and speeches", "Sound effects and ambient sounds", "Other types of audio recordings"]\n\n **IMAGE CONTENT** (content_type: "image"):\n - type: "IMAGE_DOCUMENTS_USED_FOR_GENERAL_PURPOSES"\n - subclass options: ["Photographs and digital images", "Illustrations, diagrams, and charts", "Infographics and visual data representations", "Artwork and paintings", "Screenshots and graphical user interfaces", "Other types of images"]\n\n **VIDEO CONTENT** (content_type: "video"):\n - type: "VIDEO_DOCUMENTS_USED_FOR_GENERAL_PURPOSES"\n - subclass options: ["Movies and short films", "Documentaries and educational videos", "Video tutorials and how-to guides", "Animated features and cartoons", "Live event recordings and sports broadcasts", "Other types of video content"]\n\n **MULTIMEDIA CONTENT** (content_type: "multimedia"):\n - type: "MULTIMEDIA_DOCUMENTS_USED_FOR_GENERAL_PURPOSES"\n - subclass options: ["Interactive web content and games", "Virtual reality (VR) and augmented reality (AR) experiences", "Mixed media presentations and slide decks", "E-learning modules with integrated multimedia", "Digital exhibitions and virtual tours", "Other types of multimedia content"]\n\n **3D MODEL CONTENT** (content_type: "3d_model"):\n - type: "3D_MODEL_DOCUMENTS_USED_FOR_GENERAL_PURPOSES"\n - subclass options: ["Architectural renderings and building plans", "Product design models and prototypes", "3D animations and character models", "Scientific simulations and visualizations", "Virtual objects for AR/VR applications", "Other types of 3D models"]\n\n **PROCEDURAL CONTENT** (content_type: "procedural"):\n - type: "PROCEDURAL_DOCUMENTS_USED_FOR_GENERAL_PURPOSES"\n - subclass options: ["Tutorials and step-by-step guides", "Workflow and process descriptions", "Simulation and training exercises", "Recipes and crafting instructions", "Other types of procedural content"]\n\n Select the most appropriate content_type, type, and relevant subclasses.\n"#\n\n// OpenAI client defined once for all BAML files\n\n// Classification function\nfunction ExtractCategories(content: string) -> DefaultContentPrediction {\n client OpenAI\n\n prompt #"\n {{ ClassifyContentPrompt() }}\n\n {{ ctx.output_format(prefix="Answer in this schema:\\n") }}\n\n {{ _.role(\'user\') }}\n {{ content }}\n "#\n}\n\n// Test case for classification\ntest ExtractCategoriesExample {\n functions [ExtractCategories]\n args {\n content #"\n Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval.\n It deals with the interaction between computers and human language, in particular how to program computers to process and analyze large amounts of natural language data.\n "#\n }\n}\n', "extract_content_graph.baml": 'class Node {\n id string\n name string\n type string\n description string\n @@dynamic\n}\n\n/// doc string for edge\nclass Edge {\n /// doc string for source_node_id\n source_node_id string\n target_node_id string\n relationship_name string\n}\n\nclass KnowledgeGraph {\n nodes (Node @stream.done)[]\n edges Edge[]\n}\n\n// Summarization classes\nclass SummarizedContent {\n summary string\n description string\n}\n\nclass SummarizedFunction {\n name string\n description string\n inputs string[]?\n outputs string[]?\n decorators string[]?\n}\n\nclass SummarizedClass {\n name string\n description string\n methods SummarizedFunction[]?\n decorators string[]?\n}\n\nclass SummarizedCode {\n high_level_summary string\n key_features string[]\n imports string[]\n constants string[]\n classes SummarizedClass[]\n functions SummarizedFunction[]\n workflow_description string?\n}\n\nclass DynamicKnowledgeGraph {\n @@dynamic\n}\n\n\n// Simple template for basic extraction (fast, good quality)\ntemplate_string ExtractContentGraphPrompt() #"\n You are an advanced algorithm that extracts structured data into a knowledge graph.\n\n - **Nodes**: Entities/concepts (like Wikipedia articles).\n - **Edges**: Relationships (like Wikipedia links). Use snake_case (e.g., `acted_in`).\n\n **Rules:**\n\n 1. **Node Labeling & IDs**\n - Use basic types only (e.g., "Person", "Date", "Organization").\n - Avoid overly specific or generic terms (e.g., no "Mathematician" or "Entity").\n - Node IDs must be human-readable names from the text (no numbers).\n\n 2. **Dates & Numbers**\n - Label dates as **"Date"** in "YYYY-MM-DD" format (use available parts if incomplete).\n - Properties are key-value pairs; do not use escaped quotes.\n\n 3. **Coreference Resolution**\n - Use a single, complete identifier for each entity (e.g., always "John Doe" not "Joe" or "he").\n\n 4. **Relationship Labels**:\n - Use descriptive, lowercase, snake_case names for edges.\n - *Example*: born_in, married_to, invented_by.\n - Avoid vague or generic labels like isA, relatesTo, has.\n - Avoid duplicated relationships like produces, produced by.\n\n 5. **Strict Compliance**\n - Follow these rules exactly. Non-compliance results in termination.\n"#\n\n// Summarization prompt template\ntemplate_string SummarizeContentPrompt() #"\n You are a top-tier summarization engine. Your task is to summarize text and make it versatile.\n Be brief and concise, but keep the important information and the subject.\n Use synonym words where possible in order to change the wording but keep the meaning.\n"#\n\n// Code summarization prompt template\ntemplate_string SummarizeCodePrompt() #"\n You are an expert code analyst. Analyze the provided source code and extract key information:\n\n 1. Provide a high-level summary of what the code does\n 2. List key features and functionality\n 3. Identify imports and dependencies\n 4. List constants and global variables\n 5. Summarize classes with their methods\n 6. Summarize standalone functions\n 7. Describe the overall workflow if applicable\n\n Be precise and technical while remaining clear and concise.\n"#\n\n// Detailed template for complex extraction (slower, higher quality)\ntemplate_string DetailedExtractContentGraphPrompt() #"\n You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.\n **Nodes** represent entities and concepts. They\'re akin to Wikipedia nodes.\n **Edges** represent relationships between concepts. They\'re akin to Wikipedia links.\n\n The aim is to achieve simplicity and clarity in the knowledge graph.\n\n # 1. Labeling Nodes\n **Consistency**: Ensure you use basic or elementary types for node labels.\n - For example, when you identify an entity representing a person, always label it as **"Person"**.\n - Avoid using more specific terms like "Mathematician" or "Scientist", keep those as "profession" property.\n - Don\'t use too generic terms like "Entity".\n **Node IDs**: Never utilize integers as node IDs.\n - Node IDs should be names or human-readable identifiers found in the text.\n\n # 2. Handling Numerical Data and Dates\n - For example, when you identify an entity representing a date, make sure it has type **"Date"**.\n - Extract the date in the format "YYYY-MM-DD"\n - If not possible to extract the whole date, extract month or year, or both if available.\n - **Property Format**: Properties must be in a key-value format.\n - **Quotation Marks**: Never use escaped single or double quotes within property values.\n - **Naming Convention**: Use snake_case for relationship names, e.g., `acted_in`.\n\n # 3. Coreference Resolution\n - **Maintain Entity Consistency**: When extracting entities, it\'s vital to ensure consistency.\n If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),\n always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the Person\'s ID.\n Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.\n\n # 4. Strict Compliance\n Adhere to the rules strictly. Non-compliance will result in termination.\n"#\n\n// Guided template with step-by-step instructions\ntemplate_string GuidedExtractContentGraphPrompt() #"\n You are an advanced algorithm designed to extract structured information to build a clean, consistent, and human-readable knowledge graph.\n\n **Objective**:\n - Nodes represent entities and concepts, similar to Wikipedia articles.\n - Edges represent typed relationships between nodes, similar to Wikipedia hyperlinks.\n - The graph must be clear, minimal, consistent, and semantically precise.\n\n **Node Guidelines**:\n\n 1. **Label Consistency**:\n - Use consistent, basic types for all node labels.\n - Do not switch between granular or vague labels for the same kind of entity.\n - Pick one label for each category and apply it uniformly.\n - Each entity type should be in a singular form and in a case of multiple words separated by whitespaces\n\n 2. **Node Identifiers**:\n - Node IDs must be human-readable and derived directly from the text.\n - Prefer full names and canonical terms.\n - Never use integers or autogenerated IDs.\n - *Example*: Use "Marie Curie", "Theory of Evolution", "Google".\n\n 3. **Coreference Resolution**:\n - Maintain one consistent node ID for each real-world entity.\n - Resolve aliases, acronyms, and pronouns to the most complete form.\n - *Example*: Always use "John Doe" even if later referred to as "Doe" or "he".\n\n **Edge Guidelines**:\n\n 4. **Relationship Labels**:\n - Use descriptive, lowercase, snake_case names for edges.\n - *Example*: born_in, married_to, invented_by.\n - Avoid vague or generic labels like isA, relatesTo, has.\n\n 5. **Relationship Direction**:\n - Edges must be directional and logically consistent.\n - *Example*:\n - "Marie Curie" —[born_in]→ "Warsaw"\n - "Radioactivity" —[discovered_by]→ "Marie Curie"\n\n **Compliance**:\n Strict adherence to these guidelines is required. Any deviation will result in immediate termination of the task.\n"#\n\n// Strict template with zero-tolerance rules\ntemplate_string StrictExtractContentGraphPrompt() #"\n You are a top-tier algorithm for **extracting structured information** from unstructured text to build a **knowledge graph**.\n\n Your primary goal is to extract:\n - **Nodes**: Representing **entities** and **concepts** (like Wikipedia nodes).\n - **Edges**: Representing **relationships** between those concepts (like Wikipedia links).\n\n The resulting knowledge graph must be **simple, consistent, and human-readable**.\n\n ## 1. Node Labeling and Identification\n\n ### Node Types\n Use **basic atomic types** for node labels. Always prefer general types over specific roles or professions:\n - "Person" for any human.\n - "Organization" for companies, institutions, etc.\n - "Location" for geographic or place entities.\n - "Date" for any temporal expression.\n - "Event" for historical or scheduled occurrences.\n - "Work" for books, films, artworks, or research papers.\n - "Concept" for abstract notions or ideas.\n\n ### Node IDs\n - Always assign **human-readable and unambiguous identifiers**.\n - Never use numeric or autogenerated IDs.\n - Prioritize **most complete form** of entity names for consistency.\n\n ## 2. Relationship Handling\n - Use **snake_case** for all relationship (edge) types.\n - Keep relationship types semantically clear and consistent.\n - Avoid vague relation names like "related_to" unless no better alternative exists.\n\n ## 3. Strict Compliance\n Follow all rules exactly. Any deviation may lead to rejection or incorrect graph construction.\n"#\n\n// OpenAI client with environment model selection\nclient OpenAI {\n provider openai\n options {\n model client_registry.model\n api_key client_registry.api_key\n }\n}\n\n\n\n// Function that returns raw structured output (for custom objects - to be handled in Python)\nfunction ExtractContentGraphGeneric(\n content: string,\n mode: "simple" | "base" | "guided" | "strict" | "custom"?,\n custom_prompt_content: string?\n) -> KnowledgeGraph {\n client OpenAI\n\n prompt #"\n {% if mode == "base" %}\n {{ DetailedExtractContentGraphPrompt() }}\n {% elif mode == "guided" %}\n {{ GuidedExtractContentGraphPrompt() }}\n {% elif mode == "strict" %}\n {{ StrictExtractContentGraphPrompt() }}\n {% elif mode == "custom" and custom_prompt_content %}\n {{ custom_prompt_content }}\n {% else %}\n {{ ExtractContentGraphPrompt() }}\n {% endif %}\n\n {{ ctx.output_format(prefix="Answer in this schema:\\n") }}\n\n Before answering, briefly describe what you\'ll extract from the text, then provide the structured output.\n\n Example format:\n I\'ll extract the main entities and their relationships from this text...\n\n { ... }\n\n {{ _.role(\'user\') }}\n {{ content }}\n "#\n}\n\n// Backward-compatible function specifically for KnowledgeGraph\nfunction ExtractDynamicContentGraph(\n content: string,\n mode: "simple" | "base" | "guided" | "strict" | "custom"?,\n custom_prompt_content: string?\n) -> DynamicKnowledgeGraph {\n client OpenAI\n\n prompt #"\n {% if mode == "base" %}\n {{ DetailedExtractContentGraphPrompt() }}\n {% elif mode == "guided" %}\n {{ GuidedExtractContentGraphPrompt() }}\n {% elif mode == "strict" %}\n {{ StrictExtractContentGraphPrompt() }}\n {% elif mode == "custom" and custom_prompt_content %}\n {{ custom_prompt_content }}\n {% else %}\n {{ ExtractContentGraphPrompt() }}\n {% endif %}\n\n {{ ctx.output_format(prefix="Answer in this schema:\\n") }}\n\n Before answering, briefly describe what you\'ll extract from the text, then provide the structured output.\n\n Example format:\n I\'ll extract the main entities and their relationships from this text...\n\n { ... }\n\n {{ _.role(\'user\') }}\n {{ content }}\n "#\n}\n\n\n// Summarization functions\nfunction SummarizeContent(content: string) -> SummarizedContent {\n client OpenAI\n\n prompt #"\n {{ SummarizeContentPrompt() }}\n\n {{ ctx.output_format(prefix="Answer in this schema:\\n") }}\n\n {{ _.role(\'user\') }}\n {{ content }}\n "#\n}\n\nfunction SummarizeCode(content: string) -> SummarizedCode {\n client OpenAI\n\n prompt #"\n {{ SummarizeCodePrompt() }}\n\n {{ ctx.output_format(prefix="Answer in this schema:\\n") }}\n\n {{ _.role(\'user\') }}\n {{ content }}\n "#\n}\n\ntest ExtractStrictExample {\n functions [ExtractContentGraphGeneric]\n args {\n content #"\n The Python programming language was created by Guido van Rossum in 1991.\n "#\n mode "strict"\n }\n}\n', "generators.baml": '// This helps use auto generate libraries you can use in the language of\n// your choice. You can have multiple generators if you use multiple languages.\n// Just ensure that the output_dir is different for each generator.\ngenerator target {\n // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"\n output_type "python/pydantic"\n\n // Where the generated code will be saved (relative to baml_src/)\n output_dir "../"\n\n // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).\n // The BAML VSCode extension version should also match this version.\n version "0.206.0"\n\n // Valid values: "sync", "async"\n // This controls what `b.FunctionName()` will be (sync or async).\n default_client_mode async\n}\n', diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py index 7482217fd..d917bb7f5 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py @@ -27,11 +27,11 @@ class LlmResponseParser: self, llm_response: str, baml_options: BamlCallOptions = {}, - ) -> types.DynamicOutputModel: + ) -> types.ResponseModel: result = self.__options.merge_options(baml_options).parse_response( function_name="AcreateStructuredOutput", llm_response=llm_response, mode="request" ) - return typing.cast(types.DynamicOutputModel, result) + return typing.cast(types.ResponseModel, result) def ExtractCategories( self, @@ -94,11 +94,11 @@ class LlmStreamParser: self, llm_response: str, baml_options: BamlCallOptions = {}, - ) -> stream_types.DynamicOutputModel: + ) -> stream_types.ResponseModel: result = self.__options.merge_options(baml_options).parse_response( function_name="AcreateStructuredOutput", llm_response=llm_response, mode="stream" ) - return typing.cast(stream_types.DynamicOutputModel, result) + return typing.cast(stream_types.ResponseModel, result) def ExtractCategories( self, diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py index 7c60ebab1..f1740467e 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py @@ -27,7 +27,7 @@ class StreamState(BaseModel, typing.Generic[StreamStateValueT]): # ######################################################################### -# Generated classes (19) +# Generated classes (18) # ######################################################################### @@ -46,20 +46,10 @@ class DefaultContentPrediction(BaseModel): label: typing.Optional["ContentLabel"] = None -class DynamicInputModel(BaseModel): - model_config = ConfigDict(extra="allow") - test: typing.Optional[str] = None - - class DynamicKnowledgeGraph(BaseModel): model_config = ConfigDict(extra="allow") -class DynamicOutputModel(BaseModel): - model_config = ConfigDict(extra="allow") - test: typing.Optional[str] = None - - class Edge(BaseModel): # doc string for edge # doc string for source_node_id @@ -102,6 +92,10 @@ class ProceduralContent(BaseModel): subclass: typing.List[str] +class ResponseModel(BaseModel): + model_config = ConfigDict(extra="allow") + + class SummarizedClass(BaseModel): name: typing.Optional[str] = None description: typing.Optional[str] = None diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py index 148cf2ea8..bf7cfb5ce 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py @@ -94,18 +94,14 @@ class BamlSyncClient: def AcreateStructuredOutput( self, - content: str, + text_input: str, system_prompt: str, - user_prompt: str, baml_options: BamlCallOptions = {}, - ) -> types.DynamicOutputModel: + ) -> types.ResponseModel: # Check if on_tick is provided if "on_tick" in baml_options: stream = self.stream.AcreateStructuredOutput( - content=content, - system_prompt=system_prompt, - user_prompt=user_prompt, - baml_options=baml_options, + text_input=text_input, system_prompt=system_prompt, baml_options=baml_options ) return stream.get_final_response() else: @@ -113,14 +109,12 @@ class BamlSyncClient: result = self.__options.merge_options(baml_options).call_function_sync( function_name="AcreateStructuredOutput", args={ - "content": content, + "text_input": text_input, "system_prompt": system_prompt, - "user_prompt": user_prompt, }, ) return typing.cast( - types.DynamicOutputModel, - result.cast_to(types, types, stream_types, False, __runtime__), + types.ResponseModel, result.cast_to(types, types, stream_types, False, __runtime__) ) def ExtractCategories( @@ -274,27 +268,24 @@ class BamlStreamClient: def AcreateStructuredOutput( self, - content: str, + text_input: str, system_prompt: str, - user_prompt: str, baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlSyncStream[stream_types.DynamicOutputModel, types.DynamicOutputModel]: + ) -> baml_py.BamlSyncStream[stream_types.ResponseModel, types.ResponseModel]: ctx, result = self.__options.merge_options(baml_options).create_sync_stream( function_name="AcreateStructuredOutput", args={ - "content": content, + "text_input": text_input, "system_prompt": system_prompt, - "user_prompt": user_prompt, }, ) - return baml_py.BamlSyncStream[stream_types.DynamicOutputModel, types.DynamicOutputModel]( + return baml_py.BamlSyncStream[stream_types.ResponseModel, types.ResponseModel]( result, lambda x: typing.cast( - stream_types.DynamicOutputModel, - x.cast_to(types, types, stream_types, True, __runtime__), + stream_types.ResponseModel, x.cast_to(types, types, stream_types, True, __runtime__) ), lambda x: typing.cast( - types.DynamicOutputModel, x.cast_to(types, types, stream_types, False, __runtime__) + types.ResponseModel, x.cast_to(types, types, stream_types, False, __runtime__) ), ctx, ) @@ -455,17 +446,15 @@ class BamlHttpRequestClient: def AcreateStructuredOutput( self, - content: str, + text_input: str, system_prompt: str, - user_prompt: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: result = self.__options.merge_options(baml_options).create_http_request_sync( function_name="AcreateStructuredOutput", args={ - "content": content, + "text_input": text_input, "system_prompt": system_prompt, - "user_prompt": user_prompt, }, mode="request", ) @@ -574,17 +563,15 @@ class BamlHttpStreamRequestClient: def AcreateStructuredOutput( self, - content: str, + text_input: str, system_prompt: str, - user_prompt: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: result = self.__options.merge_options(baml_options).create_http_request_sync( function_name="AcreateStructuredOutput", args={ - "content": content, + "text_input": text_input, "system_prompt": system_prompt, - "user_prompt": user_prompt, }, mode="stream", ) diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py index d9c704a8f..ac8736e3f 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py @@ -27,9 +27,7 @@ class TypeBuilder(type_builder.TypeBuilder): "AudioContent", "ContentLabel", "DefaultContentPrediction", - "DynamicInputModel", "DynamicKnowledgeGraph", - "DynamicOutputModel", "Edge", "ImageContent", "KnowledgeGraph", @@ -37,6 +35,7 @@ class TypeBuilder(type_builder.TypeBuilder): "MultimediaContent", "Node", "ProceduralContent", + "ResponseModel", "SummarizedClass", "SummarizedCode", "SummarizedContent", @@ -54,7 +53,7 @@ class TypeBuilder(type_builder.TypeBuilder): # ######################################################################### # ######################################################################### - # Generated classes 19 + # Generated classes 18 # ######################################################################### @property @@ -69,18 +68,10 @@ class TypeBuilder(type_builder.TypeBuilder): def DefaultContentPrediction(self) -> "DefaultContentPredictionViewer": return DefaultContentPredictionViewer(self) - @property - def DynamicInputModel(self) -> "DynamicInputModelBuilder": - return DynamicInputModelBuilder(self) - @property def DynamicKnowledgeGraph(self) -> "DynamicKnowledgeGraphBuilder": return DynamicKnowledgeGraphBuilder(self) - @property - def DynamicOutputModel(self) -> "DynamicOutputModelBuilder": - return DynamicOutputModelBuilder(self) - @property def Edge(self) -> "EdgeViewer": return EdgeViewer(self) @@ -109,6 +100,10 @@ class TypeBuilder(type_builder.TypeBuilder): def ProceduralContent(self) -> "ProceduralContentViewer": return ProceduralContentViewer(self) + @property + def ResponseModel(self) -> "ResponseModelBuilder": + return ResponseModelBuilder(self) + @property def SummarizedClass(self) -> "SummarizedClassViewer": return SummarizedClassViewer(self) @@ -140,7 +135,7 @@ class TypeBuilder(type_builder.TypeBuilder): # ######################################################################### -# Generated classes 19 +# Generated classes 18 # ######################################################################### @@ -279,59 +274,6 @@ class DefaultContentPredictionProperties: return type_builder.ClassPropertyViewer(self.__bldr.property("label")) -class DynamicInputModelAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("DynamicInputModel") - self._properties: typing.Set[str] = set( - [ - "test", - ] - ) - self._props = DynamicInputModelProperties(self._bldr, self._properties) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - - @property - def props(self) -> "DynamicInputModelProperties": - return self._props - - -class DynamicInputModelBuilder(DynamicInputModelAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - def add_property(self, name: str, type: baml_py.FieldType) -> baml_py.ClassPropertyBuilder: - if name in self._properties: - raise ValueError(f"Property {name} already exists.") - return self._bldr.property(name).type(type) - - def list_properties(self) -> typing.List[typing.Tuple[str, baml_py.ClassPropertyBuilder]]: - return self._bldr.list_properties() - - def remove_property(self, name: str) -> None: - self._bldr.remove_property(name) - - def reset(self) -> None: - self._bldr.reset() - - -class DynamicInputModelProperties: - def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): - self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 - - def __getattr__(self, name: str) -> baml_py.ClassPropertyBuilder: - if name not in self.__properties: - raise AttributeError(f"Property {name} not found.") - return self.__bldr.property(name) - - @property - def test(self) -> baml_py.ClassPropertyBuilder: - return self.__bldr.property("test") - - class DynamicKnowledgeGraphAst: def __init__(self, tb: type_builder.TypeBuilder): _tb = tb._tb # type: ignore (we know how to use this private attribute) @@ -377,59 +319,6 @@ class DynamicKnowledgeGraphProperties: return self.__bldr.property(name) -class DynamicOutputModelAst: - def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) - self._bldr = _tb.class_("DynamicOutputModel") - self._properties: typing.Set[str] = set( - [ - "test", - ] - ) - self._props = DynamicOutputModelProperties(self._bldr, self._properties) - - def type(self) -> baml_py.FieldType: - return self._bldr.field() - - @property - def props(self) -> "DynamicOutputModelProperties": - return self._props - - -class DynamicOutputModelBuilder(DynamicOutputModelAst): - def __init__(self, tb: type_builder.TypeBuilder): - super().__init__(tb) - - def add_property(self, name: str, type: baml_py.FieldType) -> baml_py.ClassPropertyBuilder: - if name in self._properties: - raise ValueError(f"Property {name} already exists.") - return self._bldr.property(name).type(type) - - def list_properties(self) -> typing.List[typing.Tuple[str, baml_py.ClassPropertyBuilder]]: - return self._bldr.list_properties() - - def remove_property(self, name: str) -> None: - self._bldr.remove_property(name) - - def reset(self) -> None: - self._bldr.reset() - - -class DynamicOutputModelProperties: - def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): - self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 - - def __getattr__(self, name: str) -> baml_py.ClassPropertyBuilder: - if name not in self.__properties: - raise AttributeError(f"Property {name} not found.") - return self.__bldr.property(name) - - @property - def test(self) -> baml_py.ClassPropertyBuilder: - return self.__bldr.property("test") - - class EdgeAst: def __init__(self, tb: type_builder.TypeBuilder): _tb = tb._tb # type: ignore (we know how to use this private attribute) @@ -773,6 +662,51 @@ class ProceduralContentProperties: return type_builder.ClassPropertyViewer(self.__bldr.property("subclass")) +class ResponseModelAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("ResponseModel") + self._properties: typing.Set[str] = set([]) + self._props = ResponseModelProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "ResponseModelProperties": + return self._props + + +class ResponseModelBuilder(ResponseModelAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + def add_property(self, name: str, type: baml_py.FieldType) -> baml_py.ClassPropertyBuilder: + if name in self._properties: + raise ValueError(f"Property {name} already exists.") + return self._bldr.property(name).type(type) + + def list_properties(self) -> typing.List[typing.Tuple[str, baml_py.ClassPropertyBuilder]]: + return self._bldr.list_properties() + + def remove_property(self, name: str) -> None: + self._bldr.remove_property(name) + + def reset(self) -> None: + self._bldr.reset() + + +class ResponseModelProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + def __getattr__(self, name: str) -> baml_py.ClassPropertyBuilder: + if name not in self.__properties: + raise AttributeError(f"Property {name} not found.") + return self.__bldr.property(name) + + class SummarizedClassAst: def __init__(self, tb: type_builder.TypeBuilder): _tb = tb._tb # type: ignore (we know how to use this private attribute) diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py index 2e0427595..1774465ee 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py @@ -21,12 +21,8 @@ type_map = { "stream_types.ContentLabel": stream_types.ContentLabel, "types.DefaultContentPrediction": types.DefaultContentPrediction, "stream_types.DefaultContentPrediction": stream_types.DefaultContentPrediction, - "types.DynamicInputModel": types.DynamicInputModel, - "stream_types.DynamicInputModel": stream_types.DynamicInputModel, "types.DynamicKnowledgeGraph": types.DynamicKnowledgeGraph, "stream_types.DynamicKnowledgeGraph": stream_types.DynamicKnowledgeGraph, - "types.DynamicOutputModel": types.DynamicOutputModel, - "stream_types.DynamicOutputModel": stream_types.DynamicOutputModel, "types.Edge": types.Edge, "stream_types.Edge": stream_types.Edge, "types.ImageContent": types.ImageContent, @@ -41,6 +37,8 @@ type_map = { "stream_types.Node": stream_types.Node, "types.ProceduralContent": types.ProceduralContent, "stream_types.ProceduralContent": stream_types.ProceduralContent, + "types.ResponseModel": types.ResponseModel, + "stream_types.ResponseModel": stream_types.ResponseModel, "types.SummarizedClass": types.SummarizedClass, "stream_types.SummarizedClass": stream_types.SummarizedClass, "types.SummarizedCode": types.SummarizedCode, diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py index fd1b10eef..4d4453b1e 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py @@ -48,7 +48,7 @@ def all_succeeded(checks: typing.Dict[CheckName, Check]) -> bool: # ######################################################################### # ######################################################################### -# Generated classes (19) +# Generated classes (18) # ######################################################################### @@ -75,20 +75,10 @@ class DefaultContentPrediction(BaseModel): label: "ContentLabel" -class DynamicInputModel(BaseModel): - model_config = ConfigDict(extra="allow") - test: str - - class DynamicKnowledgeGraph(BaseModel): model_config = ConfigDict(extra="allow") -class DynamicOutputModel(BaseModel): - model_config = ConfigDict(extra="allow") - test: str - - class Edge(BaseModel): # doc string for edge # doc string for source_node_id @@ -131,6 +121,10 @@ class ProceduralContent(BaseModel): subclass: typing.List[str] +class ResponseModel(BaseModel): + model_config = ConfigDict(extra="allow") + + class SummarizedClass(BaseModel): name: str description: str diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/acreate_structured_output.baml b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/acreate_structured_output.baml index 36f434e18..2ecddb8cb 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/acreate_structured_output.baml +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/acreate_structured_output.baml @@ -1,25 +1,17 @@ -class DynamicInputModel { - test string - @@dynamic -} - -class DynamicOutputModel { - test string +class ResponseModel { @@dynamic } function AcreateStructuredOutput( - content: string, + text_input: string, system_prompt: string, - user_prompt: string, -) -> DynamicOutputModel { +) -> ResponseModel { client OpenAI prompt #" {{ system_prompt }} {{ ctx.output_format }} {{ _.role('user') }} - {{ user_prompt }} - {{ content }} + {{ text_input }} "# } diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py index e9e8b8b6b..a3f8bdbe9 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py @@ -1,18 +1,113 @@ -import os import asyncio from typing import Type -from pydantic import BaseModel from cognee.shared.logging_utils import get_logger from cognee.shared.data_models import SummarizedCode -from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b from cognee.infrastructure.llm.config import get_llm_config +from typing import List, Dict, Union, Optional, Literal +from enum import Enum +from baml_py import Image, Audio, Video, Pdf +from datetime import datetime + +from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.type_builder import ( + TypeBuilder, +) +from cognee.infrastructure.llm.structured_output_framework.baml.baml_client import b +from pydantic import BaseModel + logger = get_logger("extract_summary_baml") +def create_dynamic_baml_type(pydantic_model): + tb = TypeBuilder() + + # if pydantic_model == str: + # b.ResponseModel.add_property("text", tb.string()) + # return tb + # + # def map_type(field_type, field_info): + # # Handle Optional/Union types + # if getattr(field_type, "__origin__", None) == Union: + # # Extract types from Union + # types = field_type.__args__ + # # Handle Optional (Union with NoneType) + # if type(None) in types: + # inner_type = next(t for t in types if t != type(None)) + # return map_type(inner_type, field_info).optional() + # # Handle regular Union + # mapped_types = [map_type(t, field_info) for t in types] + # return tb.union(*mapped_types) + # + # # Handle Lists + # if getattr(field_type, "__origin__", None) == list: + # inner_type = field_type.__args__[0] + # return map_type(inner_type, field_info).list() + # + # # Handle Maps/Dictionaries + # if getattr(field_type, "__origin__", None) == dict: + # key_type, value_type = field_type.__args__ + # # BAML only supports string or enum keys in maps + # if key_type not in [str, Enum]: + # raise ValueError("Map keys must be strings or enums in BAML") + # return tb.map(map_type(key_type, field_info), map_type(value_type, field_info)) + # + # # Handle Literal types + # if getattr(field_type, "__origin__", None) == Literal: + # literal_values = field_type.__args__ + # return tb.union(*[tb.literal(val) for val in literal_values]) + # + # # Handle Enums + # if isinstance(field_type, type) and issubclass(field_type, Enum): + # enum_type = tb.add_enum(field_type.__name__) + # for member in field_type: + # enum_type.add_value(member.name) + # return enum_type.type() + # + # # Handle primitive and special types + # type_mapping = { + # str: tb.string(), + # int: tb.int(), + # float: tb.float(), + # bool: tb.bool(), + # Image: tb.image(), + # Audio: tb.audio(), + # Video: tb.video(), + # Pdf: tb.pdf(), + # # datetime is not natively supported in BAML, map to string + # datetime: tb.string(), + # } + # + # # Handle nested BaseModel classes + # if isinstance(field_type, type) and issubclass(field_type, BaseModel): + # nested_tb = create_dynamic_baml_type(field_type) + # # Get the last created class from the nested TypeBuilder + # return nested_tb.get_last_class().type() + # + # if field_type in type_mapping: + # return type_mapping[field_type] + # + # raise ValueError(f"Unsupported type: {field_type}") + # + # fields = pydantic_model.model_fields + # + # # Add fields + # for field_name, field_info in fields.items(): + # field_type = field_info.annotation + # baml_type = map_type(field_type, field_info) + # + # # Add property with type + # prop = b.ResponseModel.add_property(field_name, baml_type) + # + # # Add description if available + # if field_info.description: + # prop.description(field_info.description) + + return tb + + async def acreate_structured_output( - content: str, system_prompt: str, user_prompt: str, response_model: Type[BaseModel] + text_input: str, system_prompt: str, response_model: Type[BaseModel] ): """ Extract summary using BAML framework. @@ -26,12 +121,12 @@ async def acreate_structured_output( """ config = get_llm_config() - # Use BAML's SummarizeContent function + type_builder = create_dynamic_baml_type(response_model) + result = await b.AcreateStructuredOutput( - content=content, + text_input=text_input, system_prompt=system_prompt, - user_prompt=user_prompt, - baml_options={"client_registry": config.baml_registry}, + baml_options={"client_registry": config.baml_registry, "tb": type_builder}, ) return result diff --git a/cognee/infrastructure/llm/utils.py b/cognee/infrastructure/llm/utils.py index cb88fa85e..7dd16d7ba 100644 --- a/cognee/infrastructure/llm/utils.py +++ b/cognee/infrastructure/llm/utils.py @@ -3,6 +3,7 @@ import litellm from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.get_llm_client import ( get_llm_client, ) +from cognee.infrastructure.llm.LLMGateway import LLMGateway from cognee.shared.logging_utils import get_logger logger = get_logger() @@ -76,8 +77,7 @@ async def test_llm_connection(): the connection attempt and re-raise the exception for further handling. """ try: - llm_adapter = get_llm_client() - await llm_adapter.acreate_structured_output( + await LLMGateway.acreate_structured_output( text_input="test", system_prompt='Respond to me with the following string: "test"', response_model=str,