refactor: Initial baml refactor commit

This commit is contained in:
Igor Ilic 2025-09-08 11:33:15 +02:00
parent ce130f358a
commit 7c27546951
14 changed files with 335 additions and 10 deletions

View file

@ -19,6 +19,17 @@ class LLMGateway:
def acreate_structured_output(
text_input: str, system_prompt: str, response_model: Type[BaseModel]
) -> Coroutine:
llm_config = get_llm_config()
if llm_config.structured_output_framework.upper() == "BAML":
from cognee.infrastructure.llm.structured_output_framework.baml.baml_src.extraction import (
acreate_structured_output,
)
return acreate_structured_output(
content=text_input,
system_prompt=system_prompt,
response_model=response_model,
)
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.get_llm_client import (
get_llm_client,
)

View file

@ -39,7 +39,7 @@ with EnsureBamlPyImport(__version__) as e:
from . import config
from .config import reset_baml_env_vars
from .sync_client import b
from .async_client import b
# FOR LEGACY COMPATIBILITY, expose "partial_types" as an alias for "stream_types"

View file

@ -76,6 +76,25 @@ class BamlAsyncClient:
def parse_stream(self):
return self.__llm_stream_parser
async def AcreateStructuredOutput(
self,
content: str,
system_prompt: str,
user_prompt: str,
baml_options: BamlCallOptions = {},
) -> types.DynamicModel:
result = await self.__options.merge_options(baml_options).call_function_async(
function_name="AcreateStructuredOutput",
args={
"content": content,
"system_prompt": system_prompt,
"user_prompt": user_prompt,
},
)
return typing.cast(
types.DynamicModel, result.cast_to(types, types, stream_types, False, __runtime__)
)
async def ExtractCategories(
self,
content: str,
@ -184,6 +203,32 @@ class BamlStreamClient:
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
def AcreateStructuredOutput(
self,
content: str,
system_prompt: str,
user_prompt: str,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlStream[stream_types.DynamicModel, types.DynamicModel]:
ctx, result = self.__options.merge_options(baml_options).create_async_stream(
function_name="AcreateStructuredOutput",
args={
"content": content,
"system_prompt": system_prompt,
"user_prompt": user_prompt,
},
)
return baml_py.BamlStream[stream_types.DynamicModel, types.DynamicModel](
result,
lambda x: typing.cast(
stream_types.DynamicModel, x.cast_to(types, types, stream_types, True, __runtime__)
),
lambda x: typing.cast(
types.DynamicModel, x.cast_to(types, types, stream_types, False, __runtime__)
),
ctx,
)
def ExtractCategories(
self,
content: str,
@ -334,6 +379,24 @@ class BamlHttpRequestClient:
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
async def AcreateStructuredOutput(
self,
content: str,
system_prompt: str,
user_prompt: str,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(
function_name="AcreateStructuredOutput",
args={
"content": content,
"system_prompt": system_prompt,
"user_prompt": user_prompt,
},
mode="request",
)
return result
async def ExtractCategories(
self,
content: str,
@ -435,6 +498,24 @@ class BamlHttpStreamRequestClient:
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
async def AcreateStructuredOutput(
self,
content: str,
system_prompt: str,
user_prompt: str,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(
function_name="AcreateStructuredOutput",
args={
"content": content,
"system_prompt": system_prompt,
"user_prompt": user_prompt,
},
mode="stream",
)
return result
async def ExtractCategories(
self,
content: str,

File diff suppressed because one or more lines are too long

View file

@ -23,6 +23,16 @@ class LlmResponseParser:
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
def AcreateStructuredOutput(
self,
llm_response: str,
baml_options: BamlCallOptions = {},
) -> types.DynamicModel:
result = self.__options.merge_options(baml_options).parse_response(
function_name="AcreateStructuredOutput", llm_response=llm_response, mode="request"
)
return typing.cast(types.DynamicModel, result)
def ExtractCategories(
self,
llm_response: str,
@ -80,6 +90,16 @@ class LlmStreamParser:
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
def AcreateStructuredOutput(
self,
llm_response: str,
baml_options: BamlCallOptions = {},
) -> stream_types.DynamicModel:
result = self.__options.merge_options(baml_options).parse_response(
function_name="AcreateStructuredOutput", llm_response=llm_response, mode="stream"
)
return typing.cast(stream_types.DynamicModel, result)
def ExtractCategories(
self,
llm_response: str,

View file

@ -27,7 +27,7 @@ class StreamState(BaseModel, typing.Generic[StreamStateValueT]):
# #########################################################################
# Generated classes (17)
# Generated classes (18)
# #########################################################################
@ -50,6 +50,11 @@ class DynamicKnowledgeGraph(BaseModel):
model_config = ConfigDict(extra="allow")
class DynamicModel(BaseModel):
model_config = ConfigDict(extra="allow")
test: typing.Optional[str] = None
class Edge(BaseModel):
# doc string for edge
# doc string for source_node_id

View file

@ -89,6 +89,25 @@ class BamlSyncClient:
def parse_stream(self):
return self.__llm_stream_parser
def AcreateStructuredOutput(
self,
content: str,
system_prompt: str,
user_prompt: str,
baml_options: BamlCallOptions = {},
) -> types.DynamicModel:
result = self.__options.merge_options(baml_options).call_function_sync(
function_name="AcreateStructuredOutput",
args={
"content": content,
"system_prompt": system_prompt,
"user_prompt": user_prompt,
},
)
return typing.cast(
types.DynamicModel, result.cast_to(types, types, stream_types, False, __runtime__)
)
def ExtractCategories(
self,
content: str,
@ -197,6 +216,32 @@ class BamlStreamClient:
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
def AcreateStructuredOutput(
self,
content: str,
system_prompt: str,
user_prompt: str,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlSyncStream[stream_types.DynamicModel, types.DynamicModel]:
ctx, result = self.__options.merge_options(baml_options).create_sync_stream(
function_name="AcreateStructuredOutput",
args={
"content": content,
"system_prompt": system_prompt,
"user_prompt": user_prompt,
},
)
return baml_py.BamlSyncStream[stream_types.DynamicModel, types.DynamicModel](
result,
lambda x: typing.cast(
stream_types.DynamicModel, x.cast_to(types, types, stream_types, True, __runtime__)
),
lambda x: typing.cast(
types.DynamicModel, x.cast_to(types, types, stream_types, False, __runtime__)
),
ctx,
)
def ExtractCategories(
self,
content: str,
@ -351,6 +396,24 @@ class BamlHttpRequestClient:
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
def AcreateStructuredOutput(
self,
content: str,
system_prompt: str,
user_prompt: str,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(
function_name="AcreateStructuredOutput",
args={
"content": content,
"system_prompt": system_prompt,
"user_prompt": user_prompt,
},
mode="request",
)
return result
def ExtractCategories(
self,
content: str,
@ -452,6 +515,24 @@ class BamlHttpStreamRequestClient:
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
def AcreateStructuredOutput(
self,
content: str,
system_prompt: str,
user_prompt: str,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(
function_name="AcreateStructuredOutput",
args={
"content": content,
"system_prompt": system_prompt,
"user_prompt": user_prompt,
},
mode="stream",
)
return result
def ExtractCategories(
self,
content: str,

View file

@ -25,6 +25,7 @@ class TypeBuilder(type_builder.TypeBuilder):
"ContentLabel",
"DefaultContentPrediction",
"DynamicKnowledgeGraph",
"DynamicModel",
"Edge",
"ImageContent",
"KnowledgeGraph",
@ -49,7 +50,7 @@ class TypeBuilder(type_builder.TypeBuilder):
# #########################################################################
# #########################################################################
# Generated classes 17
# Generated classes 18
# #########################################################################
@property
@ -68,6 +69,10 @@ class TypeBuilder(type_builder.TypeBuilder):
def DynamicKnowledgeGraph(self) -> "DynamicKnowledgeGraphBuilder":
return DynamicKnowledgeGraphBuilder(self)
@property
def DynamicModel(self) -> "DynamicModelBuilder":
return DynamicModelBuilder(self)
@property
def Edge(self) -> "EdgeViewer":
return EdgeViewer(self)
@ -127,7 +132,7 @@ class TypeBuilder(type_builder.TypeBuilder):
# #########################################################################
# Generated classes 17
# Generated classes 18
# #########################################################################
@ -305,6 +310,53 @@ class DynamicKnowledgeGraphProperties:
return self.__bldr.property(name)
class DynamicModelAst:
def __init__(self, tb: type_builder.TypeBuilder):
_tb = tb._tb # type: ignore (we know how to use this private attribute)
self._bldr = _tb.class_("DynamicModel")
self._properties: typing.Set[str] = set(
[
"test",
]
)
self._props = DynamicModelProperties(self._bldr, self._properties)
def type(self) -> baml_py.FieldType:
return self._bldr.field()
@property
def props(self) -> "DynamicModelProperties":
return self._props
class DynamicModelBuilder(DynamicModelAst):
def __init__(self, tb: type_builder.TypeBuilder):
super().__init__(tb)
def add_property(self, name: str, type: baml_py.FieldType) -> baml_py.ClassPropertyBuilder:
if name in self._properties:
raise ValueError(f"Property {name} already exists.")
return self._bldr.property(name).type(type)
def list_properties(self) -> typing.List[typing.Tuple[str, baml_py.ClassPropertyBuilder]]:
return [(name, self._bldr.property(name)) for name in self._properties]
class DynamicModelProperties:
def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]):
self.__bldr = bldr
self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821
def __getattr__(self, name: str) -> baml_py.ClassPropertyBuilder:
if name not in self.__properties:
raise AttributeError(f"Property {name} not found.")
return self.__bldr.property(name)
@property
def test(self) -> baml_py.ClassPropertyBuilder:
return self.__bldr.property("test")
class EdgeAst:
def __init__(self, tb: type_builder.TypeBuilder):
_tb = tb._tb # type: ignore (we know how to use this private attribute)

View file

@ -23,6 +23,8 @@ type_map = {
"stream_types.DefaultContentPrediction": stream_types.DefaultContentPrediction,
"types.DynamicKnowledgeGraph": types.DynamicKnowledgeGraph,
"stream_types.DynamicKnowledgeGraph": stream_types.DynamicKnowledgeGraph,
"types.DynamicModel": types.DynamicModel,
"stream_types.DynamicModel": stream_types.DynamicModel,
"types.Edge": types.Edge,
"stream_types.Edge": stream_types.Edge,
"types.ImageContent": types.ImageContent,

View file

@ -48,7 +48,7 @@ def all_succeeded(checks: typing.Dict[CheckName, Check]) -> bool:
# #########################################################################
# #########################################################################
# Generated classes (17)
# Generated classes (18)
# #########################################################################
@ -79,6 +79,11 @@ class DynamicKnowledgeGraph(BaseModel):
model_config = ConfigDict(extra="allow")
class DynamicModel(BaseModel):
model_config = ConfigDict(extra="allow")
test: str
class Edge(BaseModel):
# doc string for edge
# doc string for source_node_id

View file

@ -0,0 +1,20 @@
class DynamicModel {
test string
@@dynamic
}
function AcreateStructuredOutput(
content: string,
system_prompt: string,
user_prompt: string,
) -> DynamicModel {
client OpenAI
prompt #"
{{ system_prompt }}
{{ ctx.output_format }}
{{ _.role('user') }}
{{ user_prompt }}
{{ content }}
"#
}

View file

@ -1,2 +1,3 @@
from .knowledge_graph.extract_content_graph import extract_content_graph
from .extract_summary import extract_summary, extract_code_summary
from .acreate_structured_output import acreate_structured_output

View file

@ -0,0 +1,46 @@
import os
import asyncio
from typing import Type
from pydantic import BaseModel
from cognee.shared.logging_utils import get_logger
from cognee.shared.data_models import SummarizedCode
from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b
from cognee.infrastructure.llm.config import get_llm_config
logger = get_logger("extract_summary_baml")
async def acreate_structured_output(
content: str, system_prompt: str, user_prompt: str, response_model: Type[BaseModel]
):
"""
Extract summary using BAML framework.
Args:
content: The content to summarize
response_model: The Pydantic model type for the response
Returns:
BaseModel: The summarized content in the specified format
"""
config = get_llm_config()
# Use BAML's SummarizeContent function
result = await b.AcreateStructuredOutput(
content=content,
system_prompt=system_prompt,
user_prompt=user_prompt,
baml_options={"client_registry": config.baml_registry},
)
return result
if __name__ == "__main__":
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(acreate_structured_output("TEST", SummarizedCode))
finally:
loop.run_until_complete(loop.shutdown_asyncgens())

View file

@ -6,13 +6,13 @@ generator target {
output_type "python/pydantic"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../baml/"
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.201.0"
version "0.206.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode sync
default_client_mode async
}