added baml logic

This commit is contained in:
vasilije 2025-07-06 18:00:05 +02:00
parent 058a73c073
commit ea035a1bce
18 changed files with 667 additions and 21 deletions

View file

@ -9,7 +9,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
class BaseConfig(BaseSettings):
data_root_directory: str = get_absolute_path(".data_storage")
monitoring_tool: object = Observer.LANGFUSE
structured_output_framework: str = os.getenv("STRUCTURED_OUTPUT_FRAMEWORK")
structured_output_framework: str = os.getenv("STRUCTURED_OUTPUT_FRAMEWORK", "")
graphistry_username: Optional[str] = os.getenv("GRAPHISTRY_USERNAME")
graphistry_password: Optional[str] = os.getenv("GRAPHISTRY_PASSWORD")
langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")

View file

@ -101,6 +101,20 @@ class BamlAsyncClient:
"content": content,"prompt_override": prompt_override,
})
return typing.cast(types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__))
async def SummarizeCode(self, content: str,
baml_options: BamlCallOptions = {},
) -> types.SummarizedCode:
result = await self.__options.merge_options(baml_options).call_function_async(function_name="SummarizeCode", args={
"content": content,
})
return typing.cast(types.SummarizedCode, result.cast_to(types, types, stream_types, False, __runtime__))
async def SummarizeContent(self, content: str,
baml_options: BamlCallOptions = {},
) -> types.SummarizedContent:
result = await self.__options.merge_options(baml_options).call_function_async(function_name="SummarizeContent", args={
"content": content,
})
return typing.cast(types.SummarizedContent, result.cast_to(types, types, stream_types, False, __runtime__))
@ -158,6 +172,30 @@ class BamlStreamClient:
lambda x: typing.cast(types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
def SummarizeCode(self, content: str,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlStream[stream_types.SummarizedCode, types.SummarizedCode]:
ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="SummarizeCode", args={
"content": content,
})
return baml_py.BamlStream[stream_types.SummarizedCode, types.SummarizedCode](
result,
lambda x: typing.cast(stream_types.SummarizedCode, x.cast_to(types, types, stream_types, True, __runtime__)),
lambda x: typing.cast(types.SummarizedCode, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
def SummarizeContent(self, content: str,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlStream[stream_types.SummarizedContent, types.SummarizedContent]:
ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="SummarizeContent", args={
"content": content,
})
return baml_py.BamlStream[stream_types.SummarizedContent, types.SummarizedContent](
result,
lambda x: typing.cast(stream_types.SummarizedContent, x.cast_to(types, types, stream_types, True, __runtime__)),
lambda x: typing.cast(types.SummarizedContent, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
class BamlHttpRequestClient:
@ -194,6 +232,20 @@ class BamlHttpRequestClient:
"content": content,"prompt_override": prompt_override,
}, mode="request")
return result
async def SummarizeCode(self, content: str,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SummarizeCode", args={
"content": content,
}, mode="request")
return result
async def SummarizeContent(self, content: str,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SummarizeContent", args={
"content": content,
}, mode="request")
return result
class BamlHttpStreamRequestClient:
@ -230,6 +282,20 @@ class BamlHttpStreamRequestClient:
"content": content,"prompt_override": prompt_override,
}, mode="stream")
return result
async def SummarizeCode(self, content: str,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SummarizeCode", args={
"content": content,
}, mode="stream")
return result
async def SummarizeContent(self, content: str,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SummarizeContent", args={
"content": content,
}, mode="stream")
return result
b = BamlAsyncClient(DoNotUseDirectlyCallManager({}))

File diff suppressed because one or more lines are too long

View file

@ -46,6 +46,18 @@ class LlmResponseParser:
result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractContentGraphWithEnvPrompt", llm_response=llm_response, mode="request")
return typing.cast(types.KnowledgeGraph, result)
def SummarizeCode(
self, llm_response: str, baml_options: BamlCallOptions = {},
) -> types.SummarizedCode:
result = self.__options.merge_options(baml_options).parse_response(function_name="SummarizeCode", llm_response=llm_response, mode="request")
return typing.cast(types.SummarizedCode, result)
def SummarizeContent(
self, llm_response: str, baml_options: BamlCallOptions = {},
) -> types.SummarizedContent:
result = self.__options.merge_options(baml_options).parse_response(function_name="SummarizeContent", llm_response=llm_response, mode="request")
return typing.cast(types.SummarizedContent, result)
class LlmStreamParser:
@ -78,4 +90,16 @@ class LlmStreamParser:
result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractContentGraphWithEnvPrompt", llm_response=llm_response, mode="stream")
return typing.cast(stream_types.KnowledgeGraph, result)
def SummarizeCode(
self, llm_response: str, baml_options: BamlCallOptions = {},
) -> stream_types.SummarizedCode:
result = self.__options.merge_options(baml_options).parse_response(function_name="SummarizeCode", llm_response=llm_response, mode="stream")
return typing.cast(stream_types.SummarizedCode, result)
def SummarizeContent(
self, llm_response: str, baml_options: BamlCallOptions = {},
) -> stream_types.SummarizedContent:
result = self.__options.merge_options(baml_options).parse_response(function_name="SummarizeContent", llm_response=llm_response, mode="stream")
return typing.cast(stream_types.SummarizedContent, result)

View file

@ -23,7 +23,7 @@ class StreamState(BaseModel, typing.Generic[StreamStateValueT]):
value: StreamStateValueT
state: typing_extensions.Literal["Pending", "Incomplete", "Complete"]
# #########################################################################
# Generated classes (3)
# Generated classes (7)
# #########################################################################
class Edge(BaseModel):
@ -45,6 +45,32 @@ class Node(BaseModel):
type: typing.Optional[str] = None
description: typing.Optional[str] = None
class SummarizedClass(BaseModel):
name: typing.Optional[str] = None
description: typing.Optional[str] = None
methods: typing.Optional[typing.List["SummarizedFunction"]] = None
decorators: typing.Optional[typing.List[str]] = None
class SummarizedCode(BaseModel):
high_level_summary: typing.Optional[str] = None
key_features: typing.List[str]
imports: typing.List[str]
constants: typing.List[str]
classes: typing.List["SummarizedClass"]
functions: typing.List["SummarizedFunction"]
workflow_description: typing.Optional[str] = None
class SummarizedContent(BaseModel):
summary: typing.Optional[str] = None
description: typing.Optional[str] = None
class SummarizedFunction(BaseModel):
name: typing.Optional[str] = None
description: typing.Optional[str] = None
inputs: typing.Optional[typing.List[str]] = None
outputs: typing.Optional[typing.List[str]] = None
decorators: typing.Optional[typing.List[str]] = None
# #########################################################################
# Generated type aliases (0)
# #########################################################################

View file

@ -113,6 +113,20 @@ class BamlSyncClient:
"content": content,"prompt_override": prompt_override,
})
return typing.cast(types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__))
def SummarizeCode(self, content: str,
baml_options: BamlCallOptions = {},
) -> types.SummarizedCode:
result = self.__options.merge_options(baml_options).call_function_sync(function_name="SummarizeCode", args={
"content": content,
})
return typing.cast(types.SummarizedCode, result.cast_to(types, types, stream_types, False, __runtime__))
def SummarizeContent(self, content: str,
baml_options: BamlCallOptions = {},
) -> types.SummarizedContent:
result = self.__options.merge_options(baml_options).call_function_sync(function_name="SummarizeContent", args={
"content": content,
})
return typing.cast(types.SummarizedContent, result.cast_to(types, types, stream_types, False, __runtime__))
@ -170,6 +184,30 @@ class BamlStreamClient:
lambda x: typing.cast(types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
def SummarizeCode(self, content: str,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlSyncStream[stream_types.SummarizedCode, types.SummarizedCode]:
ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="SummarizeCode", args={
"content": content,
})
return baml_py.BamlSyncStream[stream_types.SummarizedCode, types.SummarizedCode](
result,
lambda x: typing.cast(stream_types.SummarizedCode, x.cast_to(types, types, stream_types, True, __runtime__)),
lambda x: typing.cast(types.SummarizedCode, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
def SummarizeContent(self, content: str,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlSyncStream[stream_types.SummarizedContent, types.SummarizedContent]:
ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="SummarizeContent", args={
"content": content,
})
return baml_py.BamlSyncStream[stream_types.SummarizedContent, types.SummarizedContent](
result,
lambda x: typing.cast(stream_types.SummarizedContent, x.cast_to(types, types, stream_types, True, __runtime__)),
lambda x: typing.cast(types.SummarizedContent, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
class BamlHttpRequestClient:
@ -206,6 +244,20 @@ class BamlHttpRequestClient:
"content": content,"prompt_override": prompt_override,
}, mode="request")
return result
def SummarizeCode(self, content: str,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SummarizeCode", args={
"content": content,
}, mode="request")
return result
def SummarizeContent(self, content: str,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SummarizeContent", args={
"content": content,
}, mode="request")
return result
class BamlHttpStreamRequestClient:
@ -242,6 +294,20 @@ class BamlHttpStreamRequestClient:
"content": content,"prompt_override": prompt_override,
}, mode="stream")
return result
def SummarizeCode(self, content: str,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SummarizeCode", args={
"content": content,
}, mode="stream")
return result
def SummarizeContent(self, content: str,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SummarizeContent", args={
"content": content,
}, mode="stream")
return result
b = BamlSyncClient(DoNotUseDirectlyCallManager({}))

View file

@ -18,7 +18,7 @@ from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIM
class TypeBuilder(type_builder.TypeBuilder):
def __init__(self):
super().__init__(classes=set(
["Edge","KnowledgeGraph","Node",]
["Edge","KnowledgeGraph","Node","SummarizedClass","SummarizedCode","SummarizedContent","SummarizedFunction",]
), enums=set(
[]
), runtime=DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME)
@ -29,7 +29,7 @@ class TypeBuilder(type_builder.TypeBuilder):
# #########################################################################
# Generated classes 3
# Generated classes 7
# #########################################################################
@property
@ -44,6 +44,22 @@ class TypeBuilder(type_builder.TypeBuilder):
def Node(self) -> "NodeBuilder":
return NodeBuilder(self)
@property
def SummarizedClass(self) -> "SummarizedClassViewer":
return SummarizedClassViewer(self)
@property
def SummarizedCode(self) -> "SummarizedCodeViewer":
return SummarizedCodeViewer(self)
@property
def SummarizedContent(self) -> "SummarizedContentViewer":
return SummarizedContentViewer(self)
@property
def SummarizedFunction(self) -> "SummarizedFunctionViewer":
return SummarizedFunctionViewer(self)
# #########################################################################
@ -52,7 +68,7 @@ class TypeBuilder(type_builder.TypeBuilder):
# #########################################################################
# Generated classes 3
# Generated classes 7
# #########################################################################
class EdgeAst:
@ -206,3 +222,215 @@ class NodeProperties:
class SummarizedClassAst:
def __init__(self, tb: type_builder.TypeBuilder):
_tb = tb._tb # type: ignore (we know how to use this private attribute)
self._bldr = _tb.class_("SummarizedClass")
self._properties: typing.Set[str] = set([ "name", "description", "methods", "decorators", ])
self._props = SummarizedClassProperties(self._bldr, self._properties)
def type(self) -> baml_py.FieldType:
return self._bldr.field()
@property
def props(self) -> "SummarizedClassProperties":
return self._props
class SummarizedClassViewer(SummarizedClassAst):
def __init__(self, tb: type_builder.TypeBuilder):
super().__init__(tb)
def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]:
return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties]
class SummarizedClassProperties:
def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]):
self.__bldr = bldr
self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821
@property
def name(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("name"))
@property
def description(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("description"))
@property
def methods(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("methods"))
@property
def decorators(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("decorators"))
class SummarizedCodeAst:
def __init__(self, tb: type_builder.TypeBuilder):
_tb = tb._tb # type: ignore (we know how to use this private attribute)
self._bldr = _tb.class_("SummarizedCode")
self._properties: typing.Set[str] = set([ "high_level_summary", "key_features", "imports", "constants", "classes", "functions", "workflow_description", ])
self._props = SummarizedCodeProperties(self._bldr, self._properties)
def type(self) -> baml_py.FieldType:
return self._bldr.field()
@property
def props(self) -> "SummarizedCodeProperties":
return self._props
class SummarizedCodeViewer(SummarizedCodeAst):
def __init__(self, tb: type_builder.TypeBuilder):
super().__init__(tb)
def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]:
return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties]
class SummarizedCodeProperties:
def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]):
self.__bldr = bldr
self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821
@property
def high_level_summary(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("high_level_summary"))
@property
def key_features(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("key_features"))
@property
def imports(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("imports"))
@property
def constants(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("constants"))
@property
def classes(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("classes"))
@property
def functions(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("functions"))
@property
def workflow_description(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("workflow_description"))
class SummarizedContentAst:
def __init__(self, tb: type_builder.TypeBuilder):
_tb = tb._tb # type: ignore (we know how to use this private attribute)
self._bldr = _tb.class_("SummarizedContent")
self._properties: typing.Set[str] = set([ "summary", "description", ])
self._props = SummarizedContentProperties(self._bldr, self._properties)
def type(self) -> baml_py.FieldType:
return self._bldr.field()
@property
def props(self) -> "SummarizedContentProperties":
return self._props
class SummarizedContentViewer(SummarizedContentAst):
def __init__(self, tb: type_builder.TypeBuilder):
super().__init__(tb)
def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]:
return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties]
class SummarizedContentProperties:
def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]):
self.__bldr = bldr
self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821
@property
def summary(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("summary"))
@property
def description(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("description"))
class SummarizedFunctionAst:
def __init__(self, tb: type_builder.TypeBuilder):
_tb = tb._tb # type: ignore (we know how to use this private attribute)
self._bldr = _tb.class_("SummarizedFunction")
self._properties: typing.Set[str] = set([ "name", "description", "inputs", "outputs", "decorators", ])
self._props = SummarizedFunctionProperties(self._bldr, self._properties)
def type(self) -> baml_py.FieldType:
return self._bldr.field()
@property
def props(self) -> "SummarizedFunctionProperties":
return self._props
class SummarizedFunctionViewer(SummarizedFunctionAst):
def __init__(self, tb: type_builder.TypeBuilder):
super().__init__(tb)
def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]:
return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties]
class SummarizedFunctionProperties:
def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]):
self.__bldr = bldr
self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821
@property
def name(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("name"))
@property
def description(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("description"))
@property
def inputs(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("inputs"))
@property
def outputs(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("outputs"))
@property
def decorators(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("decorators"))

View file

@ -25,5 +25,17 @@ type_map = {
"types.Node": types.Node,
"stream_types.Node": stream_types.Node,
"types.SummarizedClass": types.SummarizedClass,
"stream_types.SummarizedClass": stream_types.SummarizedClass,
"types.SummarizedCode": types.SummarizedCode,
"stream_types.SummarizedCode": stream_types.SummarizedCode,
"types.SummarizedContent": types.SummarizedContent,
"stream_types.SummarizedContent": stream_types.SummarizedContent,
"types.SummarizedFunction": types.SummarizedFunction,
"stream_types.SummarizedFunction": stream_types.SummarizedFunction,
}

View file

@ -41,7 +41,7 @@ def all_succeeded(checks: typing.Dict[CheckName, Check]) -> bool:
# #########################################################################
# #########################################################################
# Generated classes (3)
# Generated classes (7)
# #########################################################################
class Edge(BaseModel):
@ -63,6 +63,32 @@ class Node(BaseModel):
type: str
description: str
class SummarizedClass(BaseModel):
name: str
description: str
methods: typing.Optional[typing.List["SummarizedFunction"]] = None
decorators: typing.Optional[typing.List[str]] = None
class SummarizedCode(BaseModel):
high_level_summary: str
key_features: typing.List[str]
imports: typing.List[str]
constants: typing.List[str]
classes: typing.List["SummarizedClass"]
functions: typing.List["SummarizedFunction"]
workflow_description: typing.Optional[str] = None
class SummarizedContent(BaseModel):
summary: str
description: str
class SummarizedFunction(BaseModel):
name: str
description: str
inputs: typing.Optional[typing.List[str]] = None
outputs: typing.Optional[typing.List[str]] = None
decorators: typing.Optional[typing.List[str]] = None
# #########################################################################
# Generated type aliases (0)
# #########################################################################

View file

@ -1,5 +1,5 @@
import os
from typing import Optional
from typing import Optional, ClassVar
from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
from pydantic import model_validator
@ -48,18 +48,19 @@ class LLMConfig(BaseSettings):
embedding_rate_limit_enabled: bool = False
embedding_rate_limit_requests: int = 60
embedding_rate_limit_interval: int = 60 # in seconds (default is 60 requests per minute)
baml_registry = ClientRegistry()
baml_registry: ClassVar[ClientRegistry] = ClientRegistry()
model_config = SettingsConfigDict(env_file=".env", extra="allow")
baml_registry.add_llm_client(name=llm_provider, provider=llm_provider, options={
"model": llm_model,
"temperature": llm_temperature,
"api_key": llm_api_key
})
# Sets MyAmazingClient as the primary client
baml_registry.set_primary('openai')
def model_post_init(self, __context) -> None:
"""Initialize the BAML registry after the model is created."""
self.baml_registry.add_llm_client(name=self.llm_provider, provider=self.llm_provider, options={
"model": self.llm_model,
"temperature": self.llm_temperature,
"api_key": self.llm_api_key
})
# Sets the primary client
self.baml_registry.set_primary(self.llm_provider)
@model_validator(mode="after")
def ensure_env_vars_for_ollama(self) -> "LLMConfig":

View file

@ -19,6 +19,37 @@ class KnowledgeGraph {
edges Edge[]
}
// Summarization classes
class SummarizedContent {
summary string
description string
}
class SummarizedFunction {
name string
description string
inputs string[]?
outputs string[]?
decorators string[]?
}
class SummarizedClass {
name string
description string
methods SummarizedFunction[]?
decorators string[]?
}
class SummarizedCode {
high_level_summary string
key_features string[]
imports string[]
constants string[]
classes SummarizedClass[]
functions SummarizedFunction[]
workflow_description string?
}
// Simple template for basic extraction (fast, good quality)
template_string ExtractContentGraphPrompt() #"
You are an advanced algorithm that extracts structured data into a knowledge graph.
@ -50,6 +81,28 @@ template_string ExtractContentGraphPrompt() #"
- Follow these rules exactly. Non-compliance results in termination.
"#
// Summarization prompt template
template_string SummarizeContentPrompt() #"
You are a top-tier summarization engine. Your task is to summarize text and make it versatile.
Be brief and concise, but keep the important information and the subject.
Use synonym words where possible in order to change the wording but keep the meaning.
"#
// Code summarization prompt template
template_string SummarizeCodePrompt() #"
You are an expert code analyst. Analyze the provided source code and extract key information:
1. Provide a high-level summary of what the code does
2. List key features and functionality
3. Identify imports and dependencies
4. List constants and global variables
5. Summarize classes with their methods
6. Summarize standalone functions
7. Describe the overall workflow if applicable
Be precise and technical while remaining clear and concise.
"#
// Detailed template for complex extraction (slower, higher quality)
template_string DetailedExtractContentGraphPrompt() #"
You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
@ -325,6 +378,33 @@ function ExtractContentGraphWithAnthropic(
"#
}
// Summarization functions
function SummarizeContent(content: string) -> SummarizedContent {
client OpenAIClientWithEnvModel
prompt #"
{{ SummarizeContentPrompt() }}
{{ ctx.output_format(prefix="Answer in this schema:\n") }}
{{ _.role('user') }}
{{ content }}
"#
}
function SummarizeCode(content: string) -> SummarizedCode {
client OpenAIClientWithEnvModel
prompt #"
{{ SummarizeCodePrompt() }}
{{ ctx.output_format(prefix="Answer in this schema:\n") }}
{{ _.role('user') }}
{{ content }}
"#
}
test ExtractPersonExample {
functions [ExtractContentGraph]
args {
@ -365,3 +445,31 @@ test ExtractGenericExample {
mode "simple"
}
}
test SummarizeContentExample {
functions [SummarizeContent]
args {
content #"
Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval.
It deals with the interaction between computers and human language, in particular how to program computers to process and analyze large amounts of natural language data.
"#
}
}
test SummarizeCodeExample {
functions [SummarizeCode]
args {
content #"
def fibonacci(n):
if n <= 1:
return n
return fibonacci(n-1) + fibonacci(n-2)
def main():
print(fibonacci(10))
if __name__ == "__main__":
main()
"#
}
}

View file

@ -1 +1,2 @@
from .knowledge_graph.extract_content_graph import extract_content_graph
from .extract_summary import extract_summary, extract_code_summary

View file

@ -0,0 +1,67 @@
import os
from typing import Type
from pydantic import BaseModel
from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b
from cognee.infrastructure.llm.structured_output_framework.baml_src.config import get_llm_config
from cognee.shared.data_models import SummarizedCode
from cognee.tasks.summarization.mock_summary import get_mock_summarized_code
from cognee.shared.logging_utils import get_logger
from instructor.exceptions import InstructorRetryException
logger = get_logger("extract_summary_baml")
async def extract_summary(content: str, response_model: Type[BaseModel]):
"""
Extract summary using BAML framework.
Args:
content: The content to summarize
response_model: The Pydantic model type for the response
Returns:
BaseModel: The summarized content in the specified format
"""
config = get_llm_config()
# Use BAML's SummarizeContent function
summary_result = await b.SummarizeContent(content, baml_options={"tb": config.baml_registry})
# Convert BAML result to the expected response model
if response_model is SummarizedCode:
# If it's asking for SummarizedCode but we got SummarizedContent,
# we need to use SummarizeCode instead
code_result = await b.SummarizeCode(content, baml_options={"tb": config.baml_registry})
return code_result
else:
# For other models, return the summary result
return summary_result
async def extract_code_summary(content: str):
"""
Extract code summary using BAML framework with mocking support.
Args:
content: The code content to summarize
Returns:
SummarizedCode: The summarized code information
"""
enable_mocking = os.getenv("MOCK_CODE_SUMMARY", "false")
if isinstance(enable_mocking, bool):
enable_mocking = str(enable_mocking).lower()
enable_mocking = enable_mocking in ("true", "1", "yes")
if enable_mocking:
result = get_mock_summarized_code()
return result
else:
try:
config = get_llm_config()
result = await b.SummarizeCode(content, baml_options={"tb": config.baml_registry})
except Exception as e:
logger.error("Failed to extract code summary with BAML, falling back to mock summary", exc_info=e)
result = get_mock_summarized_code()
return result

View file

@ -1,8 +1,8 @@
import os
from typing import Type
from pydantic import BaseModel
from cognee.infrastructure.llm.structured_output_framework.baml.async_client import b
from cognee.infrastructure.llm.structured_output_framework.baml.type_builder import TypeBuilder
from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b
from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.type_builder import TypeBuilder
from cognee.infrastructure.llm.structured_output_framework.baml_src.config import get_llm_config
from cognee.shared.logging_utils import get_logger, setup_logging

View file

@ -9,8 +9,10 @@ from cognee.base_config import get_base_config
base = get_base_config()
if base.structured_output_framework == 'BAML':
print(f"Using BAML framework: {base.structured_output_framework}")
from cognee.infrastructure.llm.structured_output_framework.baml_src.extraction import extract_content_graph
else:
print(f"Using llitellm_instructor framework: {base.structured_output_framework}")
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.extraction import extract_content_graph
async def extract_graph_from_code(

View file

@ -9,8 +9,10 @@ from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.base_config import get_base_config
base = get_base_config()
if base.structured_output_framework == 'BAML':
print(f"Using BAML framework: {base.structured_output_framework}")
from cognee.infrastructure.llm.structured_output_framework.baml_src.extraction import extract_content_graph
else:
print(f"Using llitellm_instructor framework: {base.structured_output_framework}")
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.extraction import extract_content_graph
from cognee.modules.graph.utils import (

View file

@ -3,7 +3,15 @@ from typing import AsyncGenerator, Union
from uuid import uuid5
from cognee.infrastructure.engine import DataPoint
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.extraction import extract_code_summary
from cognee.base_config import get_base_config
base = get_base_config()
if base.structured_output_framework == 'BAML':
print(f"Using BAML framework for code summarization: {base.structured_output_framework}")
from cognee.infrastructure.llm.structured_output_framework.baml_src.extraction import extract_code_summary
else:
print(f"Using llitellm_instructor framework for code summarization: {base.structured_output_framework}")
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.extraction import extract_code_summary
from .models import CodeSummary

View file

@ -2,7 +2,16 @@ import asyncio
from typing import Type
from uuid import uuid5
from pydantic import BaseModel
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.extraction import extract_summary
from cognee.base_config import get_base_config
base = get_base_config()
if base.structured_output_framework == 'BAML':
print(f"Using BAML framework for text summarization: {base.structured_output_framework}")
from cognee.infrastructure.llm.structured_output_framework.baml_src.extraction import extract_summary
else:
print(f"Using llitellm_instructor framework for text summarization: {base.structured_output_framework}")
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.extraction import extract_summary
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.modules.cognify.config import get_cognify_config
from .models import TextSummary