diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py index 85dfab90c..c4fe2bc85 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py @@ -13,9 +13,9 @@ __version__ = "0.201.0" try: - from baml_py.safe_import import EnsureBamlPyImport + from baml_py.safe_import import EnsureBamlPyImport except ImportError: - raise ImportError(f"""Update to baml-py required. + raise ImportError(f"""Update to baml-py required. Version of baml_client generator (see generators.baml): {__version__} Please upgrade baml-py to version "{__version__}". @@ -31,15 +31,16 @@ https://boundaryml.com/discord with EnsureBamlPyImport(__version__) as e: - e.raise_if_incompatible_version(__version__) + e.raise_if_incompatible_version(__version__) - from . import types - from . import tracing - from . import stream_types - from . import config - from .config import reset_baml_env_vars - - from .sync_client import b + from . import types + from . import tracing + from . import stream_types + from . import config + from .config import reset_baml_env_vars + + from .sync_client import b + # FOR LEGACY COMPATIBILITY, expose "partial_types" as an alias for "stream_types" @@ -47,11 +48,11 @@ with EnsureBamlPyImport(__version__) as e: partial_types = stream_types __all__ = [ - "b", - "stream_types", - "partial_types", - "tracing", - "types", - "reset_baml_env_vars", - "config", -] + "b", + "stream_types", + "partial_types", + "tracing", + "types", + "reset_baml_env_vars", + "config", +] \ No newline at end of file diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py index 17c57f261..18b24abdd 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py @@ -36,13 +36,10 @@ class BamlAsyncClient: self.__llm_response_parser = LlmResponseParser(options) self.__llm_stream_parser = LlmStreamParser(options) - def with_options( - self, + def with_options(self, tb: typing.Optional[type_builder.TypeBuilder] = None, client_registry: typing.Optional[baml_py.baml_py.ClientRegistry] = None, - collector: typing.Optional[ - typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]] - ] = None, + collector: typing.Optional[typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]]] = None, env: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None, ) -> "BamlAsyncClient": options: BamlCallOptions = {} @@ -58,151 +55,60 @@ class BamlAsyncClient: @property def stream(self): - return self.__stream_client + return self.__stream_client @property def request(self): - return self.__http_request + return self.__http_request @property def stream_request(self): - return self.__http_stream_request + return self.__http_stream_request @property def parse(self): - return self.__llm_response_parser + return self.__llm_response_parser @property def parse_stream(self): - return self.__llm_stream_parser - - async def ExtractContentGraph( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + return self.__llm_stream_parser + + async def ExtractCategories(self, content: str, + baml_options: BamlCallOptions = {}, + ) -> types.DefaultContentPrediction: + result = await self.__options.merge_options(baml_options).call_function_async(function_name="ExtractCategories", args={ + "content": content, + }) + return typing.cast(types.DefaultContentPrediction, result.cast_to(types, types, stream_types, False, __runtime__)) + async def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, ) -> types.KnowledgeGraph: - result = await self.__options.merge_options(baml_options).call_function_async( - function_name="ExtractContentGraph", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - ) - return typing.cast( - types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__) - ) - - async def ExtractContentGraphGeneric( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + result = await self.__options.merge_options(baml_options).call_function_async(function_name="ExtractContentGraphGeneric", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }) + return typing.cast(types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__)) + async def ExtractDynamicContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, - ) -> types.KnowledgeGraph: - result = await self.__options.merge_options(baml_options).call_function_async( - function_name="ExtractContentGraphGeneric", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - ) - return typing.cast( - types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__) - ) - - async def ExtractContentGraphWithAnthropic( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, - baml_options: BamlCallOptions = {}, - ) -> types.KnowledgeGraph: - result = await self.__options.merge_options(baml_options).call_function_async( - function_name="ExtractContentGraphWithAnthropic", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - ) - return typing.cast( - types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__) - ) - - async def ExtractContentGraphWithEnvPrompt( - self, - content: str, - prompt_override: typing.Optional[str] = None, - baml_options: BamlCallOptions = {}, - ) -> types.KnowledgeGraph: - result = await self.__options.merge_options(baml_options).call_function_async( - function_name="ExtractContentGraphWithEnvPrompt", - args={ - "content": content, - "prompt_override": prompt_override, - }, - ) - return typing.cast( - types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__) - ) - - async def SummarizeCode( - self, - content: str, + ) -> types.DynamicKnowledgeGraph: + result = await self.__options.merge_options(baml_options).call_function_async(function_name="ExtractDynamicContentGraph", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }) + return typing.cast(types.DynamicKnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__)) + async def SummarizeCode(self, content: str, baml_options: BamlCallOptions = {}, ) -> types.SummarizedCode: - result = await self.__options.merge_options(baml_options).call_function_async( - function_name="SummarizeCode", - args={ - "content": content, - }, - ) - return typing.cast( - types.SummarizedCode, result.cast_to(types, types, stream_types, False, __runtime__) - ) - - async def SummarizeContent( - self, - content: str, + result = await self.__options.merge_options(baml_options).call_function_async(function_name="SummarizeCode", args={ + "content": content, + }) + return typing.cast(types.SummarizedCode, result.cast_to(types, types, stream_types, False, __runtime__)) + async def SummarizeContent(self, content: str, baml_options: BamlCallOptions = {}, ) -> types.SummarizedContent: - result = await self.__options.merge_options(baml_options).call_function_async( - function_name="SummarizeContent", - args={ - "content": content, - }, - ) - return typing.cast( - types.SummarizedContent, result.cast_to(types, types, stream_types, False, __runtime__) - ) + result = await self.__options.merge_options(baml_options).call_function_async(function_name="SummarizeContent", args={ + "content": content, + }) + return typing.cast(types.SummarizedContent, result.cast_to(types, types, stream_types, False, __runtime__)) + class BamlStreamClient: @@ -211,182 +117,67 @@ class BamlStreamClient: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - def ExtractContentGraph( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + def ExtractCategories(self, content: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.BamlStream[stream_types.DefaultContentPrediction, types.DefaultContentPrediction]: + ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="ExtractCategories", args={ + "content": content, + }) + return baml_py.BamlStream[stream_types.DefaultContentPrediction, types.DefaultContentPrediction]( + result, + lambda x: typing.cast(stream_types.DefaultContentPrediction, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.DefaultContentPrediction, x.cast_to(types, types, stream_types, False, __runtime__)), + ctx, + ) + def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, ) -> baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]: - ctx, result = self.__options.merge_options(baml_options).create_async_stream( - function_name="ExtractContentGraph", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - ) + ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="ExtractContentGraphGeneric", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }) return baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]( - result, - lambda x: typing.cast( - stream_types.KnowledgeGraph, - x.cast_to(types, types, stream_types, True, __runtime__), - ), - lambda x: typing.cast( - types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__) - ), - ctx, + result, + lambda x: typing.cast(stream_types.KnowledgeGraph, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)), + ctx, ) - - def ExtractContentGraphGeneric( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + def ExtractDynamicContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]: - ctx, result = self.__options.merge_options(baml_options).create_async_stream( - function_name="ExtractContentGraphGeneric", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, + ) -> baml_py.BamlStream[stream_types.DynamicKnowledgeGraph, types.DynamicKnowledgeGraph]: + ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="ExtractDynamicContentGraph", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }) + return baml_py.BamlStream[stream_types.DynamicKnowledgeGraph, types.DynamicKnowledgeGraph]( + result, + lambda x: typing.cast(stream_types.DynamicKnowledgeGraph, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.DynamicKnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)), + ctx, ) - return baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]( - result, - lambda x: typing.cast( - stream_types.KnowledgeGraph, - x.cast_to(types, types, stream_types, True, __runtime__), - ), - lambda x: typing.cast( - types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__) - ), - ctx, - ) - - def ExtractContentGraphWithAnthropic( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, - baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]: - ctx, result = self.__options.merge_options(baml_options).create_async_stream( - function_name="ExtractContentGraphWithAnthropic", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - ) - return baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]( - result, - lambda x: typing.cast( - stream_types.KnowledgeGraph, - x.cast_to(types, types, stream_types, True, __runtime__), - ), - lambda x: typing.cast( - types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__) - ), - ctx, - ) - - def ExtractContentGraphWithEnvPrompt( - self, - content: str, - prompt_override: typing.Optional[str] = None, - baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]: - ctx, result = self.__options.merge_options(baml_options).create_async_stream( - function_name="ExtractContentGraphWithEnvPrompt", - args={ - "content": content, - "prompt_override": prompt_override, - }, - ) - return baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]( - result, - lambda x: typing.cast( - stream_types.KnowledgeGraph, - x.cast_to(types, types, stream_types, True, __runtime__), - ), - lambda x: typing.cast( - types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__) - ), - ctx, - ) - - def SummarizeCode( - self, - content: str, + def SummarizeCode(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.BamlStream[stream_types.SummarizedCode, types.SummarizedCode]: - ctx, result = self.__options.merge_options(baml_options).create_async_stream( - function_name="SummarizeCode", - args={ - "content": content, - }, - ) + ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="SummarizeCode", args={ + "content": content, + }) return baml_py.BamlStream[stream_types.SummarizedCode, types.SummarizedCode]( - result, - lambda x: typing.cast( - stream_types.SummarizedCode, - x.cast_to(types, types, stream_types, True, __runtime__), - ), - lambda x: typing.cast( - types.SummarizedCode, x.cast_to(types, types, stream_types, False, __runtime__) - ), - ctx, + result, + lambda x: typing.cast(stream_types.SummarizedCode, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.SummarizedCode, x.cast_to(types, types, stream_types, False, __runtime__)), + ctx, ) - - def SummarizeContent( - self, - content: str, + def SummarizeContent(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.BamlStream[stream_types.SummarizedContent, types.SummarizedContent]: - ctx, result = self.__options.merge_options(baml_options).create_async_stream( - function_name="SummarizeContent", - args={ - "content": content, - }, - ) + ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="SummarizeContent", args={ + "content": content, + }) return baml_py.BamlStream[stream_types.SummarizedContent, types.SummarizedContent]( - result, - lambda x: typing.cast( - stream_types.SummarizedContent, - x.cast_to(types, types, stream_types, True, __runtime__), - ), - lambda x: typing.cast( - types.SummarizedContent, x.cast_to(types, types, stream_types, False, __runtime__) - ), - ctx, + result, + lambda x: typing.cast(stream_types.SummarizedContent, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.SummarizedContent, x.cast_to(types, types, stream_types, False, __runtime__)), + ctx, ) - + class BamlHttpRequestClient: __options: DoNotUseDirectlyCallManager @@ -394,128 +185,42 @@ class BamlHttpRequestClient: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - async def ExtractContentGraph( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + async def ExtractCategories(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = await self.__options.merge_options(baml_options).create_http_request_async( - function_name="ExtractContentGraph", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - mode="request", - ) + result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractCategories", args={ + "content": content, + }, mode="request") return result - - async def ExtractContentGraphGeneric( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + async def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = await self.__options.merge_options(baml_options).create_http_request_async( - function_name="ExtractContentGraphGeneric", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - mode="request", - ) + result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractContentGraphGeneric", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }, mode="request") return result - - async def ExtractContentGraphWithAnthropic( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + async def ExtractDynamicContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = await self.__options.merge_options(baml_options).create_http_request_async( - function_name="ExtractContentGraphWithAnthropic", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - mode="request", - ) + result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractDynamicContentGraph", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }, mode="request") return result - - async def ExtractContentGraphWithEnvPrompt( - self, - content: str, - prompt_override: typing.Optional[str] = None, + async def SummarizeCode(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = await self.__options.merge_options(baml_options).create_http_request_async( - function_name="ExtractContentGraphWithEnvPrompt", - args={ - "content": content, - "prompt_override": prompt_override, - }, - mode="request", - ) + result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SummarizeCode", args={ + "content": content, + }, mode="request") return result - - async def SummarizeCode( - self, - content: str, + async def SummarizeContent(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = await self.__options.merge_options(baml_options).create_http_request_async( - function_name="SummarizeCode", - args={ - "content": content, - }, - mode="request", - ) + result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SummarizeContent", args={ + "content": content, + }, mode="request") return result - - async def SummarizeContent( - self, - content: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.baml_py.HTTPRequest: - result = await self.__options.merge_options(baml_options).create_http_request_async( - function_name="SummarizeContent", - args={ - "content": content, - }, - mode="request", - ) - return result - + class BamlHttpStreamRequestClient: __options: DoNotUseDirectlyCallManager @@ -523,127 +228,41 @@ class BamlHttpStreamRequestClient: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - async def ExtractContentGraph( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + async def ExtractCategories(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = await self.__options.merge_options(baml_options).create_http_request_async( - function_name="ExtractContentGraph", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - mode="stream", - ) + result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractCategories", args={ + "content": content, + }, mode="stream") return result - - async def ExtractContentGraphGeneric( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + async def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = await self.__options.merge_options(baml_options).create_http_request_async( - function_name="ExtractContentGraphGeneric", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - mode="stream", - ) + result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractContentGraphGeneric", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }, mode="stream") return result - - async def ExtractContentGraphWithAnthropic( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + async def ExtractDynamicContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = await self.__options.merge_options(baml_options).create_http_request_async( - function_name="ExtractContentGraphWithAnthropic", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - mode="stream", - ) + result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractDynamicContentGraph", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }, mode="stream") return result - - async def ExtractContentGraphWithEnvPrompt( - self, - content: str, - prompt_override: typing.Optional[str] = None, + async def SummarizeCode(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = await self.__options.merge_options(baml_options).create_http_request_async( - function_name="ExtractContentGraphWithEnvPrompt", - args={ - "content": content, - "prompt_override": prompt_override, - }, - mode="stream", - ) + result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SummarizeCode", args={ + "content": content, + }, mode="stream") return result - - async def SummarizeCode( - self, - content: str, + async def SummarizeContent(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = await self.__options.merge_options(baml_options).create_http_request_async( - function_name="SummarizeCode", - args={ - "content": content, - }, - mode="stream", - ) + result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="SummarizeContent", args={ + "content": content, + }, mode="stream") return result + - async def SummarizeContent( - self, - content: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.baml_py.HTTPRequest: - result = await self.__options.merge_options(baml_options).create_http_request_async( - function_name="SummarizeContent", - args={ - "content": content, - }, - mode="stream", - ) - return result - - -b = BamlAsyncClient(DoNotUseDirectlyCallManager({})) +b = BamlAsyncClient(DoNotUseDirectlyCallManager({})) \ No newline at end of file diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py index 7ef10535c..769e055bb 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py @@ -19,19 +19,17 @@ from .inlinedbaml import get_baml_files from typing import Dict DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME = BamlRuntime.from_files( - "baml_src", get_baml_files(), os.environ.copy() + "baml_src", + get_baml_files(), + os.environ.copy() ) -DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX = BamlCtxManager( - DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME -) - +DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX = BamlCtxManager(DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME) def reset_baml_env_vars(env_vars: Dict[str, str]): warnings.warn( "reset_baml_env_vars is deprecated and should be removed. Environment variables are now lazily loaded on each function call", DeprecationWarning, - stacklevel=2, + stacklevel=2 ) - __all__ = [] diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py index 66ec84fbb..8eee28982 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py @@ -11,11 +11,11 @@ # baml-cli is available with the baml package. _file_map = { - "extract_categories.baml": "", - "extract_content_graph.baml": 'class Node {\n id string\n name string\n type string\n description string\n @@dynamic\n}\n\n/// doc string for edge\nclass Edge {\n /// doc string for source_node_id\n source_node_id string\n target_node_id string\n relationship_name string\n}\n\nclass KnowledgeGraph {\n nodes (Node @stream.done)[]\n edges Edge[]\n}\n\n// Summarization classes\nclass SummarizedContent {\n summary string\n description string\n}\n\nclass SummarizedFunction {\n name string\n description string\n inputs string[]?\n outputs string[]?\n decorators string[]?\n}\n\nclass SummarizedClass {\n name string\n description string\n methods SummarizedFunction[]?\n decorators string[]?\n}\n\nclass SummarizedCode {\n high_level_summary string\n key_features string[]\n imports string[]\n constants string[]\n classes SummarizedClass[]\n functions SummarizedFunction[]\n workflow_description string?\n}\n\n// Simple template for basic extraction (fast, good quality)\ntemplate_string ExtractContentGraphPrompt() #"\n You are an advanced algorithm that extracts structured data into a knowledge graph.\n\n - **Nodes**: Entities/concepts (like Wikipedia articles).\n - **Edges**: Relationships (like Wikipedia links). Use snake_case (e.g., `acted_in`).\n\n **Rules:**\n\n 1. **Node Labeling & IDs**\n - Use basic types only (e.g., "Person", "Date", "Organization").\n - Avoid overly specific or generic terms (e.g., no "Mathematician" or "Entity").\n - Node IDs must be human-readable names from the text (no numbers).\n\n 2. **Dates & Numbers**\n - Label dates as **"Date"** in "YYYY-MM-DD" format (use available parts if incomplete).\n - Properties are key-value pairs; do not use escaped quotes.\n\n 3. **Coreference Resolution**\n - Use a single, complete identifier for each entity (e.g., always "John Doe" not "Joe" or "he").\n\n 4. **Relationship Labels**:\n - Use descriptive, lowercase, snake_case names for edges.\n - *Example*: born_in, married_to, invented_by.\n - Avoid vague or generic labels like isA, relatesTo, has.\n - Avoid duplicated relationships like produces, produced by.\n\n 5. **Strict Compliance**\n - Follow these rules exactly. Non-compliance results in termination.\n"#\n\n// Summarization prompt template\ntemplate_string SummarizeContentPrompt() #"\n You are a top-tier summarization engine. Your task is to summarize text and make it versatile.\n Be brief and concise, but keep the important information and the subject.\n Use synonym words where possible in order to change the wording but keep the meaning.\n"#\n\n// Code summarization prompt template\ntemplate_string SummarizeCodePrompt() #"\n You are an expert code analyst. Analyze the provided source code and extract key information:\n\n 1. Provide a high-level summary of what the code does\n 2. List key features and functionality\n 3. Identify imports and dependencies\n 4. List constants and global variables\n 5. Summarize classes with their methods\n 6. Summarize standalone functions\n 7. Describe the overall workflow if applicable\n\n Be precise and technical while remaining clear and concise.\n"#\n\n// Detailed template for complex extraction (slower, higher quality)\ntemplate_string DetailedExtractContentGraphPrompt() #"\n You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.\n **Nodes** represent entities and concepts. They\'re akin to Wikipedia nodes.\n **Edges** represent relationships between concepts. They\'re akin to Wikipedia links.\n\n The aim is to achieve simplicity and clarity in the knowledge graph.\n\n # 1. Labeling Nodes\n **Consistency**: Ensure you use basic or elementary types for node labels.\n - For example, when you identify an entity representing a person, always label it as **"Person"**.\n - Avoid using more specific terms like "Mathematician" or "Scientist", keep those as "profession" property.\n - Don\'t use too generic terms like "Entity".\n **Node IDs**: Never utilize integers as node IDs.\n - Node IDs should be names or human-readable identifiers found in the text.\n\n # 2. Handling Numerical Data and Dates\n - For example, when you identify an entity representing a date, make sure it has type **"Date"**.\n - Extract the date in the format "YYYY-MM-DD"\n - If not possible to extract the whole date, extract month or year, or both if available.\n - **Property Format**: Properties must be in a key-value format.\n - **Quotation Marks**: Never use escaped single or double quotes within property values.\n - **Naming Convention**: Use snake_case for relationship names, e.g., `acted_in`.\n\n # 3. Coreference Resolution\n - **Maintain Entity Consistency**: When extracting entities, it\'s vital to ensure consistency.\n If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),\n always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the Person\'s ID.\n Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.\n\n # 4. Strict Compliance\n Adhere to the rules strictly. Non-compliance will result in termination.\n"#\n\n// Guided template with step-by-step instructions\ntemplate_string GuidedExtractContentGraphPrompt() #"\n You are an advanced algorithm designed to extract structured information to build a clean, consistent, and human-readable knowledge graph.\n\n **Objective**:\n - Nodes represent entities and concepts, similar to Wikipedia articles.\n - Edges represent typed relationships between nodes, similar to Wikipedia hyperlinks.\n - The graph must be clear, minimal, consistent, and semantically precise.\n\n **Node Guidelines**:\n\n 1. **Label Consistency**:\n - Use consistent, basic types for all node labels.\n - Do not switch between granular or vague labels for the same kind of entity.\n - Pick one label for each category and apply it uniformly.\n - Each entity type should be in a singular form and in a case of multiple words separated by whitespaces\n\n 2. **Node Identifiers**:\n - Node IDs must be human-readable and derived directly from the text.\n - Prefer full names and canonical terms.\n - Never use integers or autogenerated IDs.\n - *Example*: Use "Marie Curie", "Theory of Evolution", "Google".\n\n 3. **Coreference Resolution**:\n - Maintain one consistent node ID for each real-world entity.\n - Resolve aliases, acronyms, and pronouns to the most complete form.\n - *Example*: Always use "John Doe" even if later referred to as "Doe" or "he".\n\n **Edge Guidelines**:\n\n 4. **Relationship Labels**:\n - Use descriptive, lowercase, snake_case names for edges.\n - *Example*: born_in, married_to, invented_by.\n - Avoid vague or generic labels like isA, relatesTo, has.\n\n 5. **Relationship Direction**:\n - Edges must be directional and logically consistent.\n - *Example*:\n - "Marie Curie" —[born_in]→ "Warsaw"\n - "Radioactivity" —[discovered_by]→ "Marie Curie"\n\n **Compliance**:\n Strict adherence to these guidelines is required. Any deviation will result in immediate termination of the task.\n"#\n\n// Strict template with zero-tolerance rules\ntemplate_string StrictExtractContentGraphPrompt() #"\n You are a top-tier algorithm for **extracting structured information** from unstructured text to build a **knowledge graph**.\n\n Your primary goal is to extract:\n - **Nodes**: Representing **entities** and **concepts** (like Wikipedia nodes).\n - **Edges**: Representing **relationships** between those concepts (like Wikipedia links).\n\n The resulting knowledge graph must be **simple, consistent, and human-readable**.\n\n ## 1. Node Labeling and Identification\n\n ### Node Types\n Use **basic atomic types** for node labels. Always prefer general types over specific roles or professions:\n - "Person" for any human.\n - "Organization" for companies, institutions, etc.\n - "Location" for geographic or place entities.\n - "Date" for any temporal expression.\n - "Event" for historical or scheduled occurrences.\n - "Work" for books, films, artworks, or research papers.\n - "Concept" for abstract notions or ideas.\n\n ### Node IDs\n - Always assign **human-readable and unambiguous identifiers**.\n - Never use numeric or autogenerated IDs.\n - Prioritize **most complete form** of entity names for consistency.\n\n ## 2. Relationship Handling\n - Use **snake_case** for all relationship (edge) types.\n - Keep relationship types semantically clear and consistent.\n - Avoid vague relation names like "related_to" unless no better alternative exists.\n\n ## 3. Strict Compliance\n Follow all rules exactly. Any deviation may lead to rejection or incorrect graph construction.\n"#\n\n// OpenAI client with environment model selection\nclient OpenAIClientWithEnvModel {\n provider openai\n options {\n model env.LLM_MODEL\n api_key env.OPENAI_API_KEY\n }\n}\n\n// Anthropic client with environment model selection\nclient AnthropicClientWithEnvModel {\n provider anthropic\n options {\n model env.LLM_MODEL\n api_key env.ANTHROPIC_API_KEY\n }\n}\n\n// Default client (maintains backward compatibility)\nclient DefaultClient {\n provider openai\n options {\n model "gpt-4o-mini"\n api_key env.OPENAI_API_KEY\n }\n}\n\n// Function that returns raw structured output (for custom objects - to be handled in Python)\nfunction ExtractContentGraphGeneric(\n content: string,\n mode: "simple" | "base" | "guided" | "strict" | "custom"?,\n custom_prompt_content: string?\n) -> KnowledgeGraph {\n client OpenAIClientWithEnvModel\n\n prompt #"\n {% if mode == "base" %}\n {{ DetailedExtractContentGraphPrompt() }}\n {% elif mode == "guided" %}\n {{ GuidedExtractContentGraphPrompt() }}\n {% elif mode == "strict" %}\n {{ StrictExtractContentGraphPrompt() }}\n {% elif mode == "custom" and custom_prompt_content %}\n {{ custom_prompt_content }}\n {% else %}\n {{ ExtractContentGraphPrompt() }}\n {% endif %}\n\n {{ ctx.output_format(prefix="Answer in this schema:\\n") }}\n\n Before answering, briefly describe what you\'ll extract from the text, then provide the structured output.\n\n Example format:\n I\'ll extract the main entities and their relationships from this text...\n\n { ... }\n\n {{ _.role(\'user\') }}\n {{ content }}\n "#\n}\n\n// Backward-compatible function specifically for KnowledgeGraph\nfunction ExtractContentGraph(\n content: string,\n mode: "simple" | "base" | "guided" | "strict" | "custom"?,\n custom_prompt_content: string?\n) -> KnowledgeGraph {\n client OpenAIClientWithEnvModel\n\n prompt #"\n {% if mode == "base" %}\n {{ DetailedExtractContentGraphPrompt() }}\n {% elif mode == "guided" %}\n {{ GuidedExtractContentGraphPrompt() }}\n {% elif mode == "strict" %}\n {{ StrictExtractContentGraphPrompt() }}\n {% elif mode == "custom" and custom_prompt_content %}\n {{ custom_prompt_content }}\n {% else %}\n {{ ExtractContentGraphPrompt() }}\n {% endif %}\n\n {{ ctx.output_format(prefix="Answer in this schema:\\n") }}\n\n Before answering, briefly describe what you\'ll extract from the text, then provide the structured output.\n\n Example format:\n I\'ll extract the main entities and their relationships from this text...\n\n { ... }\n\n {{ _.role(\'user\') }}\n {{ content }}\n "#\n}\n\n// Alternative function that uses environment variable for prompt selection\nfunction ExtractContentGraphWithEnvPrompt(\n content: string,\n prompt_override: string?\n) -> KnowledgeGraph {\n client OpenAIClientWithEnvModel\n\n prompt #"\n {% if prompt_override %}\n {{ prompt_override }}\n {% else %}\n {{ ExtractContentGraphPrompt() }}\n {% endif %}\n\n {{ ctx.output_format(prefix="Answer in this schema:\\n") }}\n\n Before answering, briefly describe what you\'ll extract from the text, then provide the structured output.\n\n Example format:\n I\'ll extract the main entities and their relationships from this text...\n\n { ... }\n\n {{ _.role(\'user\') }}\n {{ content }}\n "#\n}\n\n// Function that uses Anthropic client\nfunction ExtractContentGraphWithAnthropic(\n content: string,\n mode: "simple" | "base" | "guided" | "strict" | "custom"?,\n custom_prompt_content: string?\n) -> KnowledgeGraph {\n client AnthropicClientWithEnvModel\n\n prompt #"\n {% if mode == "base" %}\n {{ DetailedExtractContentGraphPrompt() }}\n {% elif mode == "guided" %}\n {{ GuidedExtractContentGraphPrompt() }}\n {% elif mode == "strict" %}\n {{ StrictExtractContentGraphPrompt() }}\n {% elif mode == "custom" and custom_prompt_content %}\n {{ custom_prompt_content }}\n {% else %}\n {{ ExtractContentGraphPrompt() }}\n {% endif %}\n\n {{ ctx.output_format(prefix="Answer in this schema:\\n") }}\n\n Before answering, briefly describe what you\'ll extract from the text, then provide the structured output.\n\n Example format:\n I\'ll extract the main entities and their relationships from this text...\n\n { ... }\n\n {{ _.role(\'user\') }}\n {{ content }}\n "#\n}\n\n// Summarization functions\nfunction SummarizeContent(content: string) -> SummarizedContent {\n client OpenAIClientWithEnvModel\n\n prompt #"\n {{ SummarizeContentPrompt() }}\n\n {{ ctx.output_format(prefix="Answer in this schema:\\n") }}\n\n {{ _.role(\'user\') }}\n {{ content }}\n "#\n}\n\nfunction SummarizeCode(content: string) -> SummarizedCode {\n client OpenAIClientWithEnvModel\n\n prompt #"\n {{ SummarizeCodePrompt() }}\n\n {{ ctx.output_format(prefix="Answer in this schema:\\n") }}\n\n {{ _.role(\'user\') }}\n {{ content }}\n "#\n}\n\ntest ExtractPersonExample {\n functions [ExtractContentGraph]\n args {\n content #"\n My name is Vasiliy. I was born in 1992. I am a software engineer. I work at Google and am based in Berlin.\n "#\n mode "simple"\n }\n}\n\ntest ExtractGuidedExample {\n functions [ExtractContentGraph]\n args {\n content #"\n Apple Inc. was founded by Steve Jobs in 1976. The company is headquartered in Cupertino, California.\n Tim Cook is the current CEO of Apple Inc.\n "#\n mode "guided"\n }\n}\n\ntest ExtractStrictExample {\n functions [ExtractContentGraph]\n args {\n content #"\n The Python programming language was created by Guido van Rossum in 1991.\n "#\n mode "strict"\n }\n}\n\ntest ExtractGenericExample {\n functions [ExtractContentGraphGeneric]\n args {\n content #"\n React is a JavaScript library for building user interfaces, developed by Facebook.\n "#\n mode "simple"\n }\n}\n\ntest SummarizeContentExample {\n functions [SummarizeContent]\n args {\n content #"\n Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval.\n It deals with the interaction between computers and human language, in particular how to program computers to process and analyze large amounts of natural language data.\n "#\n }\n}\n\ntest SummarizeCodeExample {\n functions [SummarizeCode]\n args {\n content #"\n def fibonacci(n):\n if n <= 1:\n return n\n return fibonacci(n-1) + fibonacci(n-2)\n \n def main():\n print(fibonacci(10))\n \n if __name__ == "__main__":\n main()\n "#\n }\n}\n', - "generators.baml": '// This helps use auto generate libraries you can use in the language of\n// your choice. You can have multiple generators if you use multiple languages.\n// Just ensure that the output_dir is different for each generator.\ngenerator target {\n // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"\n output_type "python/pydantic"\n\n // Where the generated code will be saved (relative to baml_src/)\n output_dir "../baml/"\n\n // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).\n // The BAML VSCode extension version should also match this version.\n version "0.201.0"\n\n // Valid values: "sync", "async"\n // This controls what `b.FunctionName()` will be (sync or async).\n default_client_mode sync\n}\n', + + "extract_categories.baml": "// Content classification data models - matching shared/data_models.py\nclass TextContent {\n type string\n subclass string[]\n}\n\nclass AudioContent {\n type string\n subclass string[]\n}\n\nclass ImageContent {\n type string\n subclass string[]\n}\n\nclass VideoContent {\n type string\n subclass string[]\n}\n\nclass MultimediaContent {\n type string\n subclass string[]\n}\n\nclass Model3DContent {\n type string\n subclass string[]\n}\n\nclass ProceduralContent {\n type string\n subclass string[]\n}\n\nclass ContentLabel {\n content_type \"text\" | \"audio\" | \"image\" | \"video\" | \"multimedia\" | \"3d_model\" | \"procedural\"\n type string\n subclass string[]\n}\n\nclass DefaultContentPrediction {\n label ContentLabel\n}\n\n// Content classification prompt template\ntemplate_string ClassifyContentPrompt() #\"\n You are a classification engine and should classify content. Make sure to use one of the existing classification options and not invent your own.\n\n Classify the content into one of these main categories and their relevant subclasses:\n\n **TEXT CONTENT** (content_type: \"text\"):\n - type: \"TEXTUAL_DOCUMENTS_USED_FOR_GENERAL_PURPOSES\"\n - subclass options: [\"Articles, essays, and reports\", \"Books and manuscripts\", \"News stories and blog posts\", \"Research papers and academic publications\", \"Social media posts and comments\", \"Website content and product descriptions\", \"Personal narratives and stories\", \"Spreadsheets and tables\", \"Forms and surveys\", \"Databases and CSV files\", \"Source code in various programming languages\", \"Shell commands and scripts\", \"Markup languages (HTML, XML)\", \"Stylesheets (CSS) and configuration files (YAML, JSON, INI)\", \"Chat transcripts and messaging history\", \"Customer service logs and interactions\", \"Conversational AI training data\", \"Textbook content and lecture notes\", \"Exam questions and academic exercises\", \"E-learning course materials\", \"Poetry and prose\", \"Scripts for plays, movies, and television\", \"Song lyrics\", \"Manuals and user guides\", \"Technical specifications and API documentation\", \"Helpdesk articles and FAQs\", \"Contracts and agreements\", \"Laws, regulations, and legal case documents\", \"Policy documents and compliance materials\", \"Clinical trial reports\", \"Patient records and case notes\", \"Scientific journal articles\", \"Financial reports and statements\", \"Business plans and proposals\", \"Market research and analysis reports\", \"Ad copies and marketing slogans\", \"Product catalogs and brochures\", \"Press releases and promotional content\", \"Professional and formal correspondence\", \"Personal emails and letters\", \"Image and video captions\", \"Annotations and metadata for various media\", \"Vocabulary lists and grammar rules\", \"Language exercises and quizzes\", \"Other types of text data\"]\n\n **AUDIO CONTENT** (content_type: \"audio\"):\n - type: \"AUDIO_DOCUMENTS_USED_FOR_GENERAL_PURPOSES\"\n - subclass options: [\"Music tracks and albums\", \"Podcasts and radio broadcasts\", \"Audiobooks and audio guides\", \"Recorded interviews and speeches\", \"Sound effects and ambient sounds\", \"Other types of audio recordings\"]\n\n **IMAGE CONTENT** (content_type: \"image\"):\n - type: \"IMAGE_DOCUMENTS_USED_FOR_GENERAL_PURPOSES\"\n - subclass options: [\"Photographs and digital images\", \"Illustrations, diagrams, and charts\", \"Infographics and visual data representations\", \"Artwork and paintings\", \"Screenshots and graphical user interfaces\", \"Other types of images\"]\n\n **VIDEO CONTENT** (content_type: \"video\"):\n - type: \"VIDEO_DOCUMENTS_USED_FOR_GENERAL_PURPOSES\"\n - subclass options: [\"Movies and short films\", \"Documentaries and educational videos\", \"Video tutorials and how-to guides\", \"Animated features and cartoons\", \"Live event recordings and sports broadcasts\", \"Other types of video content\"]\n\n **MULTIMEDIA CONTENT** (content_type: \"multimedia\"):\n - type: \"MULTIMEDIA_DOCUMENTS_USED_FOR_GENERAL_PURPOSES\"\n - subclass options: [\"Interactive web content and games\", \"Virtual reality (VR) and augmented reality (AR) experiences\", \"Mixed media presentations and slide decks\", \"E-learning modules with integrated multimedia\", \"Digital exhibitions and virtual tours\", \"Other types of multimedia content\"]\n\n **3D MODEL CONTENT** (content_type: \"3d_model\"):\n - type: \"3D_MODEL_DOCUMENTS_USED_FOR_GENERAL_PURPOSES\"\n - subclass options: [\"Architectural renderings and building plans\", \"Product design models and prototypes\", \"3D animations and character models\", \"Scientific simulations and visualizations\", \"Virtual objects for AR/VR applications\", \"Other types of 3D models\"]\n\n **PROCEDURAL CONTENT** (content_type: \"procedural\"):\n - type: \"PROCEDURAL_DOCUMENTS_USED_FOR_GENERAL_PURPOSES\"\n - subclass options: [\"Tutorials and step-by-step guides\", \"Workflow and process descriptions\", \"Simulation and training exercises\", \"Recipes and crafting instructions\", \"Other types of procedural content\"]\n\n Select the most appropriate content_type, type, and relevant subclasses.\n\"#\n\n// OpenAI client defined once for all BAML files\n\n// Classification function\nfunction ExtractCategories(content: string) -> DefaultContentPrediction {\n client OpenAI\n\n prompt #\"\n {{ ClassifyContentPrompt() }}\n\n {{ ctx.output_format(prefix=\"Answer in this schema:\\n\") }}\n\n {{ _.role('user') }}\n {{ content }}\n \"#\n}\n\n// Test case for classification\ntest ExtractCategoriesExample {\n functions [ExtractCategories]\n args {\n content #\"\n Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval.\n It deals with the interaction between computers and human language, in particular how to program computers to process and analyze large amounts of natural language data.\n \"#\n }\n}\n", + "extract_content_graph.baml": "class Node {\n id string\n name string\n type string\n description string\n @@dynamic\n}\n\n/// doc string for edge\nclass Edge {\n /// doc string for source_node_id\n source_node_id string\n target_node_id string\n relationship_name string\n}\n\nclass KnowledgeGraph {\n nodes (Node @stream.done)[]\n edges Edge[]\n}\n\n// Summarization classes\nclass SummarizedContent {\n summary string\n description string\n}\n\nclass SummarizedFunction {\n name string\n description string\n inputs string[]?\n outputs string[]?\n decorators string[]?\n}\n\nclass SummarizedClass {\n name string\n description string\n methods SummarizedFunction[]?\n decorators string[]?\n}\n\nclass SummarizedCode {\n high_level_summary string\n key_features string[]\n imports string[]\n constants string[]\n classes SummarizedClass[]\n functions SummarizedFunction[]\n workflow_description string?\n}\n\nclass DynamicKnowledgeGraph {\n @@dynamic\n}\n\n\n// Simple template for basic extraction (fast, good quality)\ntemplate_string ExtractContentGraphPrompt() #\"\n You are an advanced algorithm that extracts structured data into a knowledge graph.\n\n - **Nodes**: Entities/concepts (like Wikipedia articles).\n - **Edges**: Relationships (like Wikipedia links). Use snake_case (e.g., `acted_in`).\n\n **Rules:**\n\n 1. **Node Labeling & IDs**\n - Use basic types only (e.g., \"Person\", \"Date\", \"Organization\").\n - Avoid overly specific or generic terms (e.g., no \"Mathematician\" or \"Entity\").\n - Node IDs must be human-readable names from the text (no numbers).\n\n 2. **Dates & Numbers**\n - Label dates as **\"Date\"** in \"YYYY-MM-DD\" format (use available parts if incomplete).\n - Properties are key-value pairs; do not use escaped quotes.\n\n 3. **Coreference Resolution**\n - Use a single, complete identifier for each entity (e.g., always \"John Doe\" not \"Joe\" or \"he\").\n\n 4. **Relationship Labels**:\n - Use descriptive, lowercase, snake_case names for edges.\n - *Example*: born_in, married_to, invented_by.\n - Avoid vague or generic labels like isA, relatesTo, has.\n - Avoid duplicated relationships like produces, produced by.\n\n 5. **Strict Compliance**\n - Follow these rules exactly. Non-compliance results in termination.\n\"#\n\n// Summarization prompt template\ntemplate_string SummarizeContentPrompt() #\"\n You are a top-tier summarization engine. Your task is to summarize text and make it versatile.\n Be brief and concise, but keep the important information and the subject.\n Use synonym words where possible in order to change the wording but keep the meaning.\n\"#\n\n// Code summarization prompt template\ntemplate_string SummarizeCodePrompt() #\"\n You are an expert code analyst. Analyze the provided source code and extract key information:\n\n 1. Provide a high-level summary of what the code does\n 2. List key features and functionality\n 3. Identify imports and dependencies\n 4. List constants and global variables\n 5. Summarize classes with their methods\n 6. Summarize standalone functions\n 7. Describe the overall workflow if applicable\n\n Be precise and technical while remaining clear and concise.\n\"#\n\n// Detailed template for complex extraction (slower, higher quality)\ntemplate_string DetailedExtractContentGraphPrompt() #\"\n You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.\n **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.\n **Edges** represent relationships between concepts. They're akin to Wikipedia links.\n\n The aim is to achieve simplicity and clarity in the knowledge graph.\n\n # 1. Labeling Nodes\n **Consistency**: Ensure you use basic or elementary types for node labels.\n - For example, when you identify an entity representing a person, always label it as **\"Person\"**.\n - Avoid using more specific terms like \"Mathematician\" or \"Scientist\", keep those as \"profession\" property.\n - Don't use too generic terms like \"Entity\".\n **Node IDs**: Never utilize integers as node IDs.\n - Node IDs should be names or human-readable identifiers found in the text.\n\n # 2. Handling Numerical Data and Dates\n - For example, when you identify an entity representing a date, make sure it has type **\"Date\"**.\n - Extract the date in the format \"YYYY-MM-DD\"\n - If not possible to extract the whole date, extract month or year, or both if available.\n - **Property Format**: Properties must be in a key-value format.\n - **Quotation Marks**: Never use escaped single or double quotes within property values.\n - **Naming Convention**: Use snake_case for relationship names, e.g., `acted_in`.\n\n # 3. Coreference Resolution\n - **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.\n If an entity, such as \"John Doe\", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., \"Joe\", \"he\"),\n always use the most complete identifier for that entity throughout the knowledge graph. In this example, use \"John Doe\" as the Person's ID.\n Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.\n\n # 4. Strict Compliance\n Adhere to the rules strictly. Non-compliance will result in termination.\n\"#\n\n// Guided template with step-by-step instructions\ntemplate_string GuidedExtractContentGraphPrompt() #\"\n You are an advanced algorithm designed to extract structured information to build a clean, consistent, and human-readable knowledge graph.\n\n **Objective**:\n - Nodes represent entities and concepts, similar to Wikipedia articles.\n - Edges represent typed relationships between nodes, similar to Wikipedia hyperlinks.\n - The graph must be clear, minimal, consistent, and semantically precise.\n\n **Node Guidelines**:\n\n 1. **Label Consistency**:\n - Use consistent, basic types for all node labels.\n - Do not switch between granular or vague labels for the same kind of entity.\n - Pick one label for each category and apply it uniformly.\n - Each entity type should be in a singular form and in a case of multiple words separated by whitespaces\n\n 2. **Node Identifiers**:\n - Node IDs must be human-readable and derived directly from the text.\n - Prefer full names and canonical terms.\n - Never use integers or autogenerated IDs.\n - *Example*: Use \"Marie Curie\", \"Theory of Evolution\", \"Google\".\n\n 3. **Coreference Resolution**:\n - Maintain one consistent node ID for each real-world entity.\n - Resolve aliases, acronyms, and pronouns to the most complete form.\n - *Example*: Always use \"John Doe\" even if later referred to as \"Doe\" or \"he\".\n\n **Edge Guidelines**:\n\n 4. **Relationship Labels**:\n - Use descriptive, lowercase, snake_case names for edges.\n - *Example*: born_in, married_to, invented_by.\n - Avoid vague or generic labels like isA, relatesTo, has.\n\n 5. **Relationship Direction**:\n - Edges must be directional and logically consistent.\n - *Example*:\n - \"Marie Curie\" —[born_in]→ \"Warsaw\"\n - \"Radioactivity\" —[discovered_by]→ \"Marie Curie\"\n\n **Compliance**:\n Strict adherence to these guidelines is required. Any deviation will result in immediate termination of the task.\n\"#\n\n// Strict template with zero-tolerance rules\ntemplate_string StrictExtractContentGraphPrompt() #\"\n You are a top-tier algorithm for **extracting structured information** from unstructured text to build a **knowledge graph**.\n\n Your primary goal is to extract:\n - **Nodes**: Representing **entities** and **concepts** (like Wikipedia nodes).\n - **Edges**: Representing **relationships** between those concepts (like Wikipedia links).\n\n The resulting knowledge graph must be **simple, consistent, and human-readable**.\n\n ## 1. Node Labeling and Identification\n\n ### Node Types\n Use **basic atomic types** for node labels. Always prefer general types over specific roles or professions:\n - \"Person\" for any human.\n - \"Organization\" for companies, institutions, etc.\n - \"Location\" for geographic or place entities.\n - \"Date\" for any temporal expression.\n - \"Event\" for historical or scheduled occurrences.\n - \"Work\" for books, films, artworks, or research papers.\n - \"Concept\" for abstract notions or ideas.\n\n ### Node IDs\n - Always assign **human-readable and unambiguous identifiers**.\n - Never use numeric or autogenerated IDs.\n - Prioritize **most complete form** of entity names for consistency.\n\n ## 2. Relationship Handling\n - Use **snake_case** for all relationship (edge) types.\n - Keep relationship types semantically clear and consistent.\n - Avoid vague relation names like \"related_to\" unless no better alternative exists.\n\n ## 3. Strict Compliance\n Follow all rules exactly. Any deviation may lead to rejection or incorrect graph construction.\n\"#\n\n// OpenAI client with environment model selection\nclient OpenAI {\n provider openai\n options {\n model client_registry.model\n api_key client_registry.api_key\n }\n}\n\n\n\n// Function that returns raw structured output (for custom objects - to be handled in Python)\nfunction ExtractContentGraphGeneric(\n content: string,\n mode: \"simple\" | \"base\" | \"guided\" | \"strict\" | \"custom\"?,\n custom_prompt_content: string?\n) -> KnowledgeGraph {\n client OpenAI\n\n prompt #\"\n {% if mode == \"base\" %}\n {{ DetailedExtractContentGraphPrompt() }}\n {% elif mode == \"guided\" %}\n {{ GuidedExtractContentGraphPrompt() }}\n {% elif mode == \"strict\" %}\n {{ StrictExtractContentGraphPrompt() }}\n {% elif mode == \"custom\" and custom_prompt_content %}\n {{ custom_prompt_content }}\n {% else %}\n {{ ExtractContentGraphPrompt() }}\n {% endif %}\n\n {{ ctx.output_format(prefix=\"Answer in this schema:\\n\") }}\n\n Before answering, briefly describe what you'll extract from the text, then provide the structured output.\n\n Example format:\n I'll extract the main entities and their relationships from this text...\n\n { ... }\n\n {{ _.role('user') }}\n {{ content }}\n \"#\n}\n\n// Backward-compatible function specifically for KnowledgeGraph\nfunction ExtractDynamicContentGraph(\n content: string,\n mode: \"simple\" | \"base\" | \"guided\" | \"strict\" | \"custom\"?,\n custom_prompt_content: string?\n) -> DynamicKnowledgeGraph {\n client OpenAI\n\n prompt #\"\n {% if mode == \"base\" %}\n {{ DetailedExtractContentGraphPrompt() }}\n {% elif mode == \"guided\" %}\n {{ GuidedExtractContentGraphPrompt() }}\n {% elif mode == \"strict\" %}\n {{ StrictExtractContentGraphPrompt() }}\n {% elif mode == \"custom\" and custom_prompt_content %}\n {{ custom_prompt_content }}\n {% else %}\n {{ ExtractContentGraphPrompt() }}\n {% endif %}\n\n {{ ctx.output_format(prefix=\"Answer in this schema:\\n\") }}\n\n Before answering, briefly describe what you'll extract from the text, then provide the structured output.\n\n Example format:\n I'll extract the main entities and their relationships from this text...\n\n { ... }\n\n {{ _.role('user') }}\n {{ content }}\n \"#\n}\n\n\n// Summarization functions\nfunction SummarizeContent(content: string) -> SummarizedContent {\n client OpenAI\n\n prompt #\"\n {{ SummarizeContentPrompt() }}\n\n {{ ctx.output_format(prefix=\"Answer in this schema:\\n\") }}\n\n {{ _.role('user') }}\n {{ content }}\n \"#\n}\n\nfunction SummarizeCode(content: string) -> SummarizedCode {\n client OpenAI\n\n prompt #\"\n {{ SummarizeCodePrompt() }}\n\n {{ ctx.output_format(prefix=\"Answer in this schema:\\n\") }}\n\n {{ _.role('user') }}\n {{ content }}\n \"#\n}\n\ntest ExtractStrictExample {\n functions [ExtractContentGraphGeneric]\n args {\n content #\"\n The Python programming language was created by Guido van Rossum in 1991.\n \"#\n mode \"strict\"\n }\n}", + "generators.baml": "// This helps use auto generate libraries you can use in the language of\n// your choice. You can have multiple generators if you use multiple languages.\n// Just ensure that the output_dir is different for each generator.\ngenerator target {\n // Valid values: \"python/pydantic\", \"typescript\", \"ruby/sorbet\", \"rest/openapi\"\n output_type \"python/pydantic\"\n\n // Where the generated code will be saved (relative to baml_src/)\n output_dir \"../baml/\"\n\n // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).\n // The BAML VSCode extension version should also match this version.\n version \"0.201.0\"\n\n // Valid values: \"sync\", \"async\"\n // This controls what `b.FunctionName()` will be (sync or async).\n default_client_mode sync\n}\n", } - def get_baml_files(): - return _file_map + return _file_map \ No newline at end of file diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py index f923f8255..845ec4b68 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py @@ -16,77 +16,43 @@ import typing_extensions from . import stream_types, types from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions - class LlmResponseParser: __options: DoNotUseDirectlyCallManager def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - def ExtractContentGraph( - self, - llm_response: str, - baml_options: BamlCallOptions = {}, - ) -> types.KnowledgeGraph: - result = self.__options.merge_options(baml_options).parse_response( - function_name="ExtractContentGraph", llm_response=llm_response, mode="request" - ) - return typing.cast(types.KnowledgeGraph, result) + def ExtractCategories( + self, llm_response: str, baml_options: BamlCallOptions = {}, + ) -> types.DefaultContentPrediction: + result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractCategories", llm_response=llm_response, mode="request") + return typing.cast(types.DefaultContentPrediction, result) def ExtractContentGraphGeneric( - self, - llm_response: str, - baml_options: BamlCallOptions = {}, + self, llm_response: str, baml_options: BamlCallOptions = {}, ) -> types.KnowledgeGraph: - result = self.__options.merge_options(baml_options).parse_response( - function_name="ExtractContentGraphGeneric", llm_response=llm_response, mode="request" - ) + result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractContentGraphGeneric", llm_response=llm_response, mode="request") return typing.cast(types.KnowledgeGraph, result) - def ExtractContentGraphWithAnthropic( - self, - llm_response: str, - baml_options: BamlCallOptions = {}, - ) -> types.KnowledgeGraph: - result = self.__options.merge_options(baml_options).parse_response( - function_name="ExtractContentGraphWithAnthropic", - llm_response=llm_response, - mode="request", - ) - return typing.cast(types.KnowledgeGraph, result) - - def ExtractContentGraphWithEnvPrompt( - self, - llm_response: str, - baml_options: BamlCallOptions = {}, - ) -> types.KnowledgeGraph: - result = self.__options.merge_options(baml_options).parse_response( - function_name="ExtractContentGraphWithEnvPrompt", - llm_response=llm_response, - mode="request", - ) - return typing.cast(types.KnowledgeGraph, result) + def ExtractDynamicContentGraph( + self, llm_response: str, baml_options: BamlCallOptions = {}, + ) -> types.DynamicKnowledgeGraph: + result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractDynamicContentGraph", llm_response=llm_response, mode="request") + return typing.cast(types.DynamicKnowledgeGraph, result) def SummarizeCode( - self, - llm_response: str, - baml_options: BamlCallOptions = {}, + self, llm_response: str, baml_options: BamlCallOptions = {}, ) -> types.SummarizedCode: - result = self.__options.merge_options(baml_options).parse_response( - function_name="SummarizeCode", llm_response=llm_response, mode="request" - ) + result = self.__options.merge_options(baml_options).parse_response(function_name="SummarizeCode", llm_response=llm_response, mode="request") return typing.cast(types.SummarizedCode, result) def SummarizeContent( - self, - llm_response: str, - baml_options: BamlCallOptions = {}, + self, llm_response: str, baml_options: BamlCallOptions = {}, ) -> types.SummarizedContent: - result = self.__options.merge_options(baml_options).parse_response( - function_name="SummarizeContent", llm_response=llm_response, mode="request" - ) + result = self.__options.merge_options(baml_options).parse_response(function_name="SummarizeContent", llm_response=llm_response, mode="request") return typing.cast(types.SummarizedContent, result) + class LlmStreamParser: __options: DoNotUseDirectlyCallManager @@ -94,66 +60,34 @@ class LlmStreamParser: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - def ExtractContentGraph( - self, - llm_response: str, - baml_options: BamlCallOptions = {}, - ) -> stream_types.KnowledgeGraph: - result = self.__options.merge_options(baml_options).parse_response( - function_name="ExtractContentGraph", llm_response=llm_response, mode="stream" - ) - return typing.cast(stream_types.KnowledgeGraph, result) + def ExtractCategories( + self, llm_response: str, baml_options: BamlCallOptions = {}, + ) -> stream_types.DefaultContentPrediction: + result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractCategories", llm_response=llm_response, mode="stream") + return typing.cast(stream_types.DefaultContentPrediction, result) def ExtractContentGraphGeneric( - self, - llm_response: str, - baml_options: BamlCallOptions = {}, + self, llm_response: str, baml_options: BamlCallOptions = {}, ) -> stream_types.KnowledgeGraph: - result = self.__options.merge_options(baml_options).parse_response( - function_name="ExtractContentGraphGeneric", llm_response=llm_response, mode="stream" - ) + result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractContentGraphGeneric", llm_response=llm_response, mode="stream") return typing.cast(stream_types.KnowledgeGraph, result) - def ExtractContentGraphWithAnthropic( - self, - llm_response: str, - baml_options: BamlCallOptions = {}, - ) -> stream_types.KnowledgeGraph: - result = self.__options.merge_options(baml_options).parse_response( - function_name="ExtractContentGraphWithAnthropic", - llm_response=llm_response, - mode="stream", - ) - return typing.cast(stream_types.KnowledgeGraph, result) - - def ExtractContentGraphWithEnvPrompt( - self, - llm_response: str, - baml_options: BamlCallOptions = {}, - ) -> stream_types.KnowledgeGraph: - result = self.__options.merge_options(baml_options).parse_response( - function_name="ExtractContentGraphWithEnvPrompt", - llm_response=llm_response, - mode="stream", - ) - return typing.cast(stream_types.KnowledgeGraph, result) + def ExtractDynamicContentGraph( + self, llm_response: str, baml_options: BamlCallOptions = {}, + ) -> stream_types.DynamicKnowledgeGraph: + result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractDynamicContentGraph", llm_response=llm_response, mode="stream") + return typing.cast(stream_types.DynamicKnowledgeGraph, result) def SummarizeCode( - self, - llm_response: str, - baml_options: BamlCallOptions = {}, + self, llm_response: str, baml_options: BamlCallOptions = {}, ) -> stream_types.SummarizedCode: - result = self.__options.merge_options(baml_options).parse_response( - function_name="SummarizeCode", llm_response=llm_response, mode="stream" - ) + result = self.__options.merge_options(baml_options).parse_response(function_name="SummarizeCode", llm_response=llm_response, mode="stream") return typing.cast(stream_types.SummarizedCode, result) def SummarizeContent( - self, - llm_response: str, - baml_options: BamlCallOptions = {}, + self, llm_response: str, baml_options: BamlCallOptions = {}, ) -> stream_types.SummarizedContent: - result = self.__options.merge_options(baml_options).parse_response( - function_name="SummarizeContent", llm_response=llm_response, mode="stream" - ) + result = self.__options.merge_options(baml_options).parse_response(function_name="SummarizeContent", llm_response=llm_response, mode="stream") return typing.cast(stream_types.SummarizedContent, result) + + \ No newline at end of file diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py index 1955e5c14..c94bb950e 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py @@ -17,10 +17,7 @@ import typing_extensions import baml_py from . import types, stream_types, type_builder -from .globals import ( - DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__, - DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX as __ctx__manager__, -) +from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__, DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX as __ctx__manager__ class BamlCallOptions(typing.TypedDict, total=False): @@ -51,6 +48,9 @@ class _ResolvedBamlOptions: self.env_vars = env_vars + + + class DoNotUseDirectlyCallManager: def __init__(self, baml_options: BamlCallOptions): self.__baml_options = baml_options @@ -74,9 +74,7 @@ class DoNotUseDirectlyCallManager: collectors_as_list = ( collector if isinstance(collector, list) - else [collector] - if collector is not None - else [] + else [collector] if collector is not None else [] ) env_vars = os.environ.copy() for k, v in self.__baml_options.get("env", {}).items(): @@ -166,9 +164,7 @@ class DoNotUseDirectlyCallManager: *, function_name: str, args: typing.Dict[str, typing.Any], - ) -> typing.Tuple[ - baml_py.baml_py.RuntimeContextManager, baml_py.baml_py.SyncFunctionResultStream - ]: + ) -> typing.Tuple[baml_py.baml_py.RuntimeContextManager, baml_py.baml_py.SyncFunctionResultStream]: resolved_options = self.__resolve() ctx = __ctx__manager__.get() result = __runtime__.stream_function_sync( @@ -176,7 +172,7 @@ class DoNotUseDirectlyCallManager: args, # this is always None, we set this later! # on_event - None, + None, # ctx ctx, # tb @@ -236,13 +232,7 @@ class DoNotUseDirectlyCallManager: mode == "stream", ) - def parse_response( - self, - *, - function_name: str, - llm_response: str, - mode: typing_extensions.Literal["stream", "request"], - ) -> typing.Any: + def parse_response(self, *, function_name: str, llm_response: str, mode: typing_extensions.Literal["stream", "request"]) -> typing.Any: resolved_options = self.__resolve() return __runtime__.parse_llm_response( function_name, @@ -263,4 +253,4 @@ class DoNotUseDirectlyCallManager: resolved_options.client_registry, # env_vars resolved_options.env_vars, - ) + ) \ No newline at end of file diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py index 0cd28bf45..86e76a3bf 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py @@ -18,18 +18,28 @@ import baml_py from . import types -StreamStateValueT = typing.TypeVar("StreamStateValueT") - - +StreamStateValueT = typing.TypeVar('StreamStateValueT') class StreamState(BaseModel, typing.Generic[StreamStateValueT]): value: StreamStateValueT state: typing_extensions.Literal["Pending", "Incomplete", "Complete"] - - # ######################################################################### -# Generated classes (7) +# Generated classes (17) # ######################################################################### +class AudioContent(BaseModel): + type: typing.Optional[str] = None + subclass: typing.List[str] + +class ContentLabel(BaseModel): + content_type: typing.Optional[typing.Union[str, str, str, str, str, str, str]] = None + type: typing.Optional[str] = None + subclass: typing.List[str] + +class DefaultContentPrediction(BaseModel): + label: typing.Optional["ContentLabel"] = None + +class DynamicKnowledgeGraph(BaseModel): + model_config = ConfigDict(extra='allow') class Edge(BaseModel): # doc string for edge @@ -39,19 +49,32 @@ class Edge(BaseModel): target_node_id: typing.Optional[str] = None relationship_name: typing.Optional[str] = None +class ImageContent(BaseModel): + type: typing.Optional[str] = None + subclass: typing.List[str] class KnowledgeGraph(BaseModel): nodes: typing.List["types.Node"] edges: typing.List["Edge"] +class Model3DContent(BaseModel): + type: typing.Optional[str] = None + subclass: typing.List[str] + +class MultimediaContent(BaseModel): + type: typing.Optional[str] = None + subclass: typing.List[str] class Node(BaseModel): - model_config = ConfigDict(extra="allow") + model_config = ConfigDict(extra='allow') id: typing.Optional[str] = None name: typing.Optional[str] = None type: typing.Optional[str] = None description: typing.Optional[str] = None +class ProceduralContent(BaseModel): + type: typing.Optional[str] = None + subclass: typing.List[str] class SummarizedClass(BaseModel): name: typing.Optional[str] = None @@ -59,7 +82,6 @@ class SummarizedClass(BaseModel): methods: typing.Optional[typing.List["SummarizedFunction"]] = None decorators: typing.Optional[typing.List[str]] = None - class SummarizedCode(BaseModel): high_level_summary: typing.Optional[str] = None key_features: typing.List[str] @@ -69,12 +91,10 @@ class SummarizedCode(BaseModel): functions: typing.List["SummarizedFunction"] workflow_description: typing.Optional[str] = None - class SummarizedContent(BaseModel): summary: typing.Optional[str] = None description: typing.Optional[str] = None - class SummarizedFunction(BaseModel): name: typing.Optional[str] = None description: typing.Optional[str] = None @@ -82,6 +102,13 @@ class SummarizedFunction(BaseModel): outputs: typing.Optional[typing.List[str]] = None decorators: typing.Optional[typing.List[str]] = None +class TextContent(BaseModel): + type: typing.Optional[str] = None + subclass: typing.List[str] + +class VideoContent(BaseModel): + type: typing.Optional[str] = None + subclass: typing.List[str] # ######################################################################### # Generated type aliases (0) diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py index f0dbe2144..ca4f97a2b 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py @@ -19,7 +19,6 @@ from .parser import LlmResponseParser, LlmStreamParser from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__ - class BamlSyncClient: __options: DoNotUseDirectlyCallManager __stream_client: "BamlStreamClient" @@ -49,13 +48,10 @@ class BamlSyncClient: self.__llm_response_parser = LlmResponseParser(self.__options) self.__llm_stream_parser = LlmStreamParser(self.__options) - def with_options( - self, + def with_options(self, tb: typing.Optional[type_builder.TypeBuilder] = None, client_registry: typing.Optional[baml_py.baml_py.ClientRegistry] = None, - collector: typing.Optional[ - typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]] - ] = None, + collector: typing.Optional[typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]]] = None, env: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None, ) -> "BamlSyncClient": options: BamlCallOptions = {} @@ -71,151 +67,60 @@ class BamlSyncClient: @property def stream(self): - return self.__stream_client + return self.__stream_client @property def request(self): - return self.__http_request + return self.__http_request @property def stream_request(self): - return self.__http_stream_request + return self.__http_stream_request @property def parse(self): - return self.__llm_response_parser + return self.__llm_response_parser @property def parse_stream(self): - return self.__llm_stream_parser - - def ExtractContentGraph( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + return self.__llm_stream_parser + + def ExtractCategories(self, content: str, + baml_options: BamlCallOptions = {}, + ) -> types.DefaultContentPrediction: + result = self.__options.merge_options(baml_options).call_function_sync(function_name="ExtractCategories", args={ + "content": content, + }) + return typing.cast(types.DefaultContentPrediction, result.cast_to(types, types, stream_types, False, __runtime__)) + def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, ) -> types.KnowledgeGraph: - result = self.__options.merge_options(baml_options).call_function_sync( - function_name="ExtractContentGraph", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - ) - return typing.cast( - types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__) - ) - - def ExtractContentGraphGeneric( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + result = self.__options.merge_options(baml_options).call_function_sync(function_name="ExtractContentGraphGeneric", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }) + return typing.cast(types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__)) + def ExtractDynamicContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, - ) -> types.KnowledgeGraph: - result = self.__options.merge_options(baml_options).call_function_sync( - function_name="ExtractContentGraphGeneric", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - ) - return typing.cast( - types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__) - ) - - def ExtractContentGraphWithAnthropic( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, - baml_options: BamlCallOptions = {}, - ) -> types.KnowledgeGraph: - result = self.__options.merge_options(baml_options).call_function_sync( - function_name="ExtractContentGraphWithAnthropic", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - ) - return typing.cast( - types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__) - ) - - def ExtractContentGraphWithEnvPrompt( - self, - content: str, - prompt_override: typing.Optional[str] = None, - baml_options: BamlCallOptions = {}, - ) -> types.KnowledgeGraph: - result = self.__options.merge_options(baml_options).call_function_sync( - function_name="ExtractContentGraphWithEnvPrompt", - args={ - "content": content, - "prompt_override": prompt_override, - }, - ) - return typing.cast( - types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__) - ) - - def SummarizeCode( - self, - content: str, + ) -> types.DynamicKnowledgeGraph: + result = self.__options.merge_options(baml_options).call_function_sync(function_name="ExtractDynamicContentGraph", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }) + return typing.cast(types.DynamicKnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__)) + def SummarizeCode(self, content: str, baml_options: BamlCallOptions = {}, ) -> types.SummarizedCode: - result = self.__options.merge_options(baml_options).call_function_sync( - function_name="SummarizeCode", - args={ - "content": content, - }, - ) - return typing.cast( - types.SummarizedCode, result.cast_to(types, types, stream_types, False, __runtime__) - ) - - def SummarizeContent( - self, - content: str, + result = self.__options.merge_options(baml_options).call_function_sync(function_name="SummarizeCode", args={ + "content": content, + }) + return typing.cast(types.SummarizedCode, result.cast_to(types, types, stream_types, False, __runtime__)) + def SummarizeContent(self, content: str, baml_options: BamlCallOptions = {}, ) -> types.SummarizedContent: - result = self.__options.merge_options(baml_options).call_function_sync( - function_name="SummarizeContent", - args={ - "content": content, - }, - ) - return typing.cast( - types.SummarizedContent, result.cast_to(types, types, stream_types, False, __runtime__) - ) + result = self.__options.merge_options(baml_options).call_function_sync(function_name="SummarizeContent", args={ + "content": content, + }) + return typing.cast(types.SummarizedContent, result.cast_to(types, types, stream_types, False, __runtime__)) + class BamlStreamClient: @@ -224,182 +129,67 @@ class BamlStreamClient: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - def ExtractContentGraph( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + def ExtractCategories(self, content: str, + baml_options: BamlCallOptions = {}, + ) -> baml_py.BamlSyncStream[stream_types.DefaultContentPrediction, types.DefaultContentPrediction]: + ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="ExtractCategories", args={ + "content": content, + }) + return baml_py.BamlSyncStream[stream_types.DefaultContentPrediction, types.DefaultContentPrediction]( + result, + lambda x: typing.cast(stream_types.DefaultContentPrediction, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.DefaultContentPrediction, x.cast_to(types, types, stream_types, False, __runtime__)), + ctx, + ) + def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, ) -> baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]: - ctx, result = self.__options.merge_options(baml_options).create_sync_stream( - function_name="ExtractContentGraph", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - ) + ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="ExtractContentGraphGeneric", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }) return baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]( - result, - lambda x: typing.cast( - stream_types.KnowledgeGraph, - x.cast_to(types, types, stream_types, True, __runtime__), - ), - lambda x: typing.cast( - types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__) - ), - ctx, + result, + lambda x: typing.cast(stream_types.KnowledgeGraph, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)), + ctx, ) - - def ExtractContentGraphGeneric( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + def ExtractDynamicContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]: - ctx, result = self.__options.merge_options(baml_options).create_sync_stream( - function_name="ExtractContentGraphGeneric", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, + ) -> baml_py.BamlSyncStream[stream_types.DynamicKnowledgeGraph, types.DynamicKnowledgeGraph]: + ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="ExtractDynamicContentGraph", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }) + return baml_py.BamlSyncStream[stream_types.DynamicKnowledgeGraph, types.DynamicKnowledgeGraph]( + result, + lambda x: typing.cast(stream_types.DynamicKnowledgeGraph, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.DynamicKnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)), + ctx, ) - return baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]( - result, - lambda x: typing.cast( - stream_types.KnowledgeGraph, - x.cast_to(types, types, stream_types, True, __runtime__), - ), - lambda x: typing.cast( - types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__) - ), - ctx, - ) - - def ExtractContentGraphWithAnthropic( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, - baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]: - ctx, result = self.__options.merge_options(baml_options).create_sync_stream( - function_name="ExtractContentGraphWithAnthropic", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - ) - return baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]( - result, - lambda x: typing.cast( - stream_types.KnowledgeGraph, - x.cast_to(types, types, stream_types, True, __runtime__), - ), - lambda x: typing.cast( - types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__) - ), - ctx, - ) - - def ExtractContentGraphWithEnvPrompt( - self, - content: str, - prompt_override: typing.Optional[str] = None, - baml_options: BamlCallOptions = {}, - ) -> baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]: - ctx, result = self.__options.merge_options(baml_options).create_sync_stream( - function_name="ExtractContentGraphWithEnvPrompt", - args={ - "content": content, - "prompt_override": prompt_override, - }, - ) - return baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]( - result, - lambda x: typing.cast( - stream_types.KnowledgeGraph, - x.cast_to(types, types, stream_types, True, __runtime__), - ), - lambda x: typing.cast( - types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__) - ), - ctx, - ) - - def SummarizeCode( - self, - content: str, + def SummarizeCode(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.BamlSyncStream[stream_types.SummarizedCode, types.SummarizedCode]: - ctx, result = self.__options.merge_options(baml_options).create_sync_stream( - function_name="SummarizeCode", - args={ - "content": content, - }, - ) + ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="SummarizeCode", args={ + "content": content, + }) return baml_py.BamlSyncStream[stream_types.SummarizedCode, types.SummarizedCode]( - result, - lambda x: typing.cast( - stream_types.SummarizedCode, - x.cast_to(types, types, stream_types, True, __runtime__), - ), - lambda x: typing.cast( - types.SummarizedCode, x.cast_to(types, types, stream_types, False, __runtime__) - ), - ctx, + result, + lambda x: typing.cast(stream_types.SummarizedCode, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.SummarizedCode, x.cast_to(types, types, stream_types, False, __runtime__)), + ctx, ) - - def SummarizeContent( - self, - content: str, + def SummarizeContent(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.BamlSyncStream[stream_types.SummarizedContent, types.SummarizedContent]: - ctx, result = self.__options.merge_options(baml_options).create_sync_stream( - function_name="SummarizeContent", - args={ - "content": content, - }, - ) + ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="SummarizeContent", args={ + "content": content, + }) return baml_py.BamlSyncStream[stream_types.SummarizedContent, types.SummarizedContent]( - result, - lambda x: typing.cast( - stream_types.SummarizedContent, - x.cast_to(types, types, stream_types, True, __runtime__), - ), - lambda x: typing.cast( - types.SummarizedContent, x.cast_to(types, types, stream_types, False, __runtime__) - ), - ctx, + result, + lambda x: typing.cast(stream_types.SummarizedContent, x.cast_to(types, types, stream_types, True, __runtime__)), + lambda x: typing.cast(types.SummarizedContent, x.cast_to(types, types, stream_types, False, __runtime__)), + ctx, ) - + class BamlHttpRequestClient: __options: DoNotUseDirectlyCallManager @@ -407,128 +197,42 @@ class BamlHttpRequestClient: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - def ExtractContentGraph( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + def ExtractCategories(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = self.__options.merge_options(baml_options).create_http_request_sync( - function_name="ExtractContentGraph", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - mode="request", - ) + result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractCategories", args={ + "content": content, + }, mode="request") return result - - def ExtractContentGraphGeneric( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = self.__options.merge_options(baml_options).create_http_request_sync( - function_name="ExtractContentGraphGeneric", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - mode="request", - ) + result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractContentGraphGeneric", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }, mode="request") return result - - def ExtractContentGraphWithAnthropic( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + def ExtractDynamicContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = self.__options.merge_options(baml_options).create_http_request_sync( - function_name="ExtractContentGraphWithAnthropic", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - mode="request", - ) + result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractDynamicContentGraph", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }, mode="request") return result - - def ExtractContentGraphWithEnvPrompt( - self, - content: str, - prompt_override: typing.Optional[str] = None, + def SummarizeCode(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = self.__options.merge_options(baml_options).create_http_request_sync( - function_name="ExtractContentGraphWithEnvPrompt", - args={ - "content": content, - "prompt_override": prompt_override, - }, - mode="request", - ) + result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SummarizeCode", args={ + "content": content, + }, mode="request") return result - - def SummarizeCode( - self, - content: str, + def SummarizeContent(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = self.__options.merge_options(baml_options).create_http_request_sync( - function_name="SummarizeCode", - args={ - "content": content, - }, - mode="request", - ) + result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SummarizeContent", args={ + "content": content, + }, mode="request") return result - - def SummarizeContent( - self, - content: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.baml_py.HTTPRequest: - result = self.__options.merge_options(baml_options).create_http_request_sync( - function_name="SummarizeContent", - args={ - "content": content, - }, - mode="request", - ) - return result - + class BamlHttpStreamRequestClient: __options: DoNotUseDirectlyCallManager @@ -536,127 +240,41 @@ class BamlHttpStreamRequestClient: def __init__(self, options: DoNotUseDirectlyCallManager): self.__options = options - def ExtractContentGraph( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + def ExtractCategories(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = self.__options.merge_options(baml_options).create_http_request_sync( - function_name="ExtractContentGraph", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - mode="stream", - ) + result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractCategories", args={ + "content": content, + }, mode="stream") return result - - def ExtractContentGraphGeneric( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = self.__options.merge_options(baml_options).create_http_request_sync( - function_name="ExtractContentGraphGeneric", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - mode="stream", - ) + result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractContentGraphGeneric", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }, mode="stream") return result - - def ExtractContentGraphWithAnthropic( - self, - content: str, - mode: typing.Optional[ - typing.Union[ - typing_extensions.Literal["simple"], - typing_extensions.Literal["base"], - typing_extensions.Literal["guided"], - typing_extensions.Literal["strict"], - typing_extensions.Literal["custom"], - ] - ] = None, - custom_prompt_content: typing.Optional[str] = None, + def ExtractDynamicContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = self.__options.merge_options(baml_options).create_http_request_sync( - function_name="ExtractContentGraphWithAnthropic", - args={ - "content": content, - "mode": mode, - "custom_prompt_content": custom_prompt_content, - }, - mode="stream", - ) + result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractDynamicContentGraph", args={ + "content": content,"mode": mode,"custom_prompt_content": custom_prompt_content, + }, mode="stream") return result - - def ExtractContentGraphWithEnvPrompt( - self, - content: str, - prompt_override: typing.Optional[str] = None, + def SummarizeCode(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = self.__options.merge_options(baml_options).create_http_request_sync( - function_name="ExtractContentGraphWithEnvPrompt", - args={ - "content": content, - "prompt_override": prompt_override, - }, - mode="stream", - ) + result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SummarizeCode", args={ + "content": content, + }, mode="stream") return result - - def SummarizeCode( - self, - content: str, + def SummarizeContent(self, content: str, baml_options: BamlCallOptions = {}, ) -> baml_py.baml_py.HTTPRequest: - result = self.__options.merge_options(baml_options).create_http_request_sync( - function_name="SummarizeCode", - args={ - "content": content, - }, - mode="stream", - ) + result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="SummarizeContent", args={ + "content": content, + }, mode="stream") return result + - def SummarizeContent( - self, - content: str, - baml_options: BamlCallOptions = {}, - ) -> baml_py.baml_py.HTTPRequest: - result = self.__options.merge_options(baml_options).create_http_request_sync( - function_name="SummarizeContent", - args={ - "content": content, - }, - mode="stream", - ) - return result - - -b = BamlSyncClient(DoNotUseDirectlyCallManager({})) +b = BamlSyncClient(DoNotUseDirectlyCallManager({})) \ No newline at end of file diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py index f925306f9..06725593c 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py @@ -14,13 +14,9 @@ from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX trace = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.trace_fn set_tags = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.upsert_tags - - def flush(): - DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.flush() - - + DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.flush() on_log_event = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.on_log_event -__all__ = ["trace", "set_tags", "flush", "on_log_event"] +__all__ = ['trace', 'set_tags', "flush", "on_log_event"] diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py index 5cdeb0c1b..62b687f71 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py @@ -15,45 +15,67 @@ from baml_py import type_builder from baml_py import baml_py from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME - class TypeBuilder(type_builder.TypeBuilder): def __init__(self): - super().__init__( - classes=set( - [ - "Edge", - "KnowledgeGraph", - "Node", - "SummarizedClass", - "SummarizedCode", - "SummarizedContent", - "SummarizedFunction", - ] - ), - enums=set([]), - runtime=DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME, - ) + super().__init__(classes=set( + ["AudioContent","ContentLabel","DefaultContentPrediction","DynamicKnowledgeGraph","Edge","ImageContent","KnowledgeGraph","Model3DContent","MultimediaContent","Node","ProceduralContent","SummarizedClass","SummarizedCode","SummarizedContent","SummarizedFunction","TextContent","VideoContent",] + ), enums=set( + [] + ), runtime=DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME) # ######################################################################### # Generated enums 0 # ######################################################################### + # ######################################################################### - # Generated classes 7 + # Generated classes 17 # ######################################################################### + @property + def AudioContent(self) -> "AudioContentViewer": + return AudioContentViewer(self) + + @property + def ContentLabel(self) -> "ContentLabelViewer": + return ContentLabelViewer(self) + + @property + def DefaultContentPrediction(self) -> "DefaultContentPredictionViewer": + return DefaultContentPredictionViewer(self) + + @property + def DynamicKnowledgeGraph(self) -> "DynamicKnowledgeGraphBuilder": + return DynamicKnowledgeGraphBuilder(self) + @property def Edge(self) -> "EdgeViewer": return EdgeViewer(self) + @property + def ImageContent(self) -> "ImageContentViewer": + return ImageContentViewer(self) + @property def KnowledgeGraph(self) -> "KnowledgeGraphViewer": return KnowledgeGraphViewer(self) + @property + def Model3DContent(self) -> "Model3DContentViewer": + return Model3DContentViewer(self) + + @property + def MultimediaContent(self) -> "MultimediaContentViewer": + return MultimediaContentViewer(self) + @property def Node(self) -> "NodeBuilder": return NodeBuilder(self) + @property + def ProceduralContent(self) -> "ProceduralContentViewer": + return ProceduralContentViewer(self) + @property def SummarizedClass(self) -> "SummarizedClassViewer": return SummarizedClassViewer(self) @@ -70,6 +92,15 @@ class TypeBuilder(type_builder.TypeBuilder): def SummarizedFunction(self) -> "SummarizedFunctionViewer": return SummarizedFunctionViewer(self) + @property + def TextContent(self) -> "TextContentViewer": + return TextContentViewer(self) + + @property + def VideoContent(self) -> "VideoContentViewer": + return VideoContentViewer(self) + + # ######################################################################### # Generated enums 0 @@ -77,21 +108,189 @@ class TypeBuilder(type_builder.TypeBuilder): # ######################################################################### -# Generated classes 7 +# Generated classes 17 # ######################################################################### +class AudioContentAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("AudioContent") + self._properties: typing.Set[str] = set([ "type", "subclass", ]) + self._props = AudioContentProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "AudioContentProperties": + return self._props + + +class AudioContentViewer(AudioContentAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class AudioContentProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def type(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("type")) + + @property + def subclass(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("subclass")) + + + + +class ContentLabelAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("ContentLabel") + self._properties: typing.Set[str] = set([ "content_type", "type", "subclass", ]) + self._props = ContentLabelProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "ContentLabelProperties": + return self._props + + +class ContentLabelViewer(ContentLabelAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class ContentLabelProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def content_type(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("content_type")) + + @property + def type(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("type")) + + @property + def subclass(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("subclass")) + + + + +class DefaultContentPredictionAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("DefaultContentPrediction") + self._properties: typing.Set[str] = set([ "label", ]) + self._props = DefaultContentPredictionProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "DefaultContentPredictionProperties": + return self._props + + +class DefaultContentPredictionViewer(DefaultContentPredictionAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class DefaultContentPredictionProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def label(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("label")) + + + + +class DynamicKnowledgeGraphAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("DynamicKnowledgeGraph") + self._properties: typing.Set[str] = set([ ]) + self._props = DynamicKnowledgeGraphProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "DynamicKnowledgeGraphProperties": + return self._props + + +class DynamicKnowledgeGraphBuilder(DynamicKnowledgeGraphAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def add_property(self, name: str, type: baml_py.FieldType) -> baml_py.ClassPropertyBuilder: + if name in self._properties: + raise ValueError(f"Property {name} already exists.") + return self._bldr.property(name).type(type) + + def list_properties(self) -> typing.List[typing.Tuple[str, baml_py.ClassPropertyBuilder]]: + return [(name, self._bldr.property(name)) for name in self._properties] + + + + +class DynamicKnowledgeGraphProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + def __getattr__(self, name: str) -> baml_py.ClassPropertyBuilder: + if name not in self.__properties: + raise AttributeError(f"Property {name} not found.") + return self.__bldr.property(name) + + + + class EdgeAst: def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) + _tb = tb._tb # type: ignore (we know how to use this private attribute) self._bldr = _tb.class_("Edge") - self._properties: typing.Set[str] = set( - [ - "source_node_id", - "target_node_id", - "relationship_name", - ] - ) + self._properties: typing.Set[str] = set([ "source_node_id", "target_node_id", "relationship_name", ]) self._props = EdgeProperties(self._bldr, self._properties) def type(self) -> baml_py.FieldType: @@ -106,41 +305,82 @@ class EdgeViewer(EdgeAst): def __init__(self, tb: type_builder.TypeBuilder): super().__init__(tb) + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [ - (name, type_builder.ClassPropertyViewer(self._bldr.property(name))) - for name in self._properties - ] + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + class EdgeProperties: def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + @property def source_node_id(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("source_node_id")) - + @property def target_node_id(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("target_node_id")) - + @property def relationship_name(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("relationship_name")) + + + + +class ImageContentAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("ImageContent") + self._properties: typing.Set[str] = set([ "type", "subclass", ]) + self._props = ImageContentProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "ImageContentProperties": + return self._props + + +class ImageContentViewer(ImageContentAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class ImageContentProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def type(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("type")) + + @property + def subclass(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("subclass")) + + class KnowledgeGraphAst: def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) + _tb = tb._tb # type: ignore (we know how to use this private attribute) self._bldr = _tb.class_("KnowledgeGraph") - self._properties: typing.Set[str] = set( - [ - "nodes", - "edges", - ] - ) + self._properties: typing.Set[str] = set([ "nodes", "edges", ]) self._props = KnowledgeGraphProperties(self._bldr, self._properties) def type(self) -> baml_py.FieldType: @@ -155,39 +395,121 @@ class KnowledgeGraphViewer(KnowledgeGraphAst): def __init__(self, tb: type_builder.TypeBuilder): super().__init__(tb) + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [ - (name, type_builder.ClassPropertyViewer(self._bldr.property(name))) - for name in self._properties - ] + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + class KnowledgeGraphProperties: def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + @property def nodes(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("nodes")) - + @property def edges(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("edges")) + + + + +class Model3DContentAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("Model3DContent") + self._properties: typing.Set[str] = set([ "type", "subclass", ]) + self._props = Model3DContentProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "Model3DContentProperties": + return self._props + + +class Model3DContentViewer(Model3DContentAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class Model3DContentProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def type(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("type")) + + @property + def subclass(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("subclass")) + + + + +class MultimediaContentAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("MultimediaContent") + self._properties: typing.Set[str] = set([ "type", "subclass", ]) + self._props = MultimediaContentProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "MultimediaContentProperties": + return self._props + + +class MultimediaContentViewer(MultimediaContentAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class MultimediaContentProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def type(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("type")) + + @property + def subclass(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("subclass")) + + class NodeAst: def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) + _tb = tb._tb # type: ignore (we know how to use this private attribute) self._bldr = _tb.class_("Node") - self._properties: typing.Set[str] = set( - [ - "id", - "name", - "type", - "description", - ] - ) + self._properties: typing.Set[str] = set([ "id", "name", "type", "description", ]) self._props = NodeProperties(self._bldr, self._properties) def type(self) -> baml_py.FieldType: @@ -202,6 +524,7 @@ class NodeBuilder(NodeAst): def __init__(self, tb: type_builder.TypeBuilder): super().__init__(tb) + def add_property(self, name: str, type: baml_py.FieldType) -> baml_py.ClassPropertyBuilder: if name in self._properties: raise ValueError(f"Property {name} already exists.") @@ -210,46 +533,88 @@ class NodeBuilder(NodeAst): def list_properties(self) -> typing.List[typing.Tuple[str, baml_py.ClassPropertyBuilder]]: return [(name, self._bldr.property(name)) for name in self._properties] + + class NodeProperties: def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + def __getattr__(self, name: str) -> baml_py.ClassPropertyBuilder: if name not in self.__properties: raise AttributeError(f"Property {name} not found.") return self.__bldr.property(name) + @property def id(self) -> baml_py.ClassPropertyBuilder: return self.__bldr.property("id") - + @property def name(self) -> baml_py.ClassPropertyBuilder: return self.__bldr.property("name") - + @property def type(self) -> baml_py.ClassPropertyBuilder: return self.__bldr.property("type") - + @property def description(self) -> baml_py.ClassPropertyBuilder: return self.__bldr.property("description") + + + + +class ProceduralContentAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("ProceduralContent") + self._properties: typing.Set[str] = set([ "type", "subclass", ]) + self._props = ProceduralContentProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "ProceduralContentProperties": + return self._props + + +class ProceduralContentViewer(ProceduralContentAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class ProceduralContentProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def type(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("type")) + + @property + def subclass(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("subclass")) + + class SummarizedClassAst: def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) + _tb = tb._tb # type: ignore (we know how to use this private attribute) self._bldr = _tb.class_("SummarizedClass") - self._properties: typing.Set[str] = set( - [ - "name", - "description", - "methods", - "decorators", - ] - ) + self._properties: typing.Set[str] = set([ "name", "description", "methods", "decorators", ]) self._props = SummarizedClassProperties(self._bldr, self._properties) def type(self) -> baml_py.FieldType: @@ -264,50 +629,43 @@ class SummarizedClassViewer(SummarizedClassAst): def __init__(self, tb: type_builder.TypeBuilder): super().__init__(tb) + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [ - (name, type_builder.ClassPropertyViewer(self._bldr.property(name))) - for name in self._properties - ] + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + class SummarizedClassProperties: def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + @property def name(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("name")) - + @property def description(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("description")) - + @property def methods(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("methods")) - + @property def decorators(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("decorators")) + + class SummarizedCodeAst: def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) + _tb = tb._tb # type: ignore (we know how to use this private attribute) self._bldr = _tb.class_("SummarizedCode") - self._properties: typing.Set[str] = set( - [ - "high_level_summary", - "key_features", - "imports", - "constants", - "classes", - "functions", - "workflow_description", - ] - ) + self._properties: typing.Set[str] = set([ "high_level_summary", "key_features", "imports", "constants", "classes", "functions", "workflow_description", ]) self._props = SummarizedCodeProperties(self._bldr, self._properties) def type(self) -> baml_py.FieldType: @@ -322,57 +680,55 @@ class SummarizedCodeViewer(SummarizedCodeAst): def __init__(self, tb: type_builder.TypeBuilder): super().__init__(tb) + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [ - (name, type_builder.ClassPropertyViewer(self._bldr.property(name))) - for name in self._properties - ] + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + class SummarizedCodeProperties: def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + @property def high_level_summary(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("high_level_summary")) - + @property def key_features(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("key_features")) - + @property def imports(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("imports")) - + @property def constants(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("constants")) - + @property def classes(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("classes")) - + @property def functions(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("functions")) - + @property def workflow_description(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("workflow_description")) + + class SummarizedContentAst: def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) + _tb = tb._tb # type: ignore (we know how to use this private attribute) self._bldr = _tb.class_("SummarizedContent") - self._properties: typing.Set[str] = set( - [ - "summary", - "description", - ] - ) + self._properties: typing.Set[str] = set([ "summary", "description", ]) self._props = SummarizedContentProperties(self._bldr, self._properties) def type(self) -> baml_py.FieldType: @@ -387,40 +743,35 @@ class SummarizedContentViewer(SummarizedContentAst): def __init__(self, tb: type_builder.TypeBuilder): super().__init__(tb) + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [ - (name, type_builder.ClassPropertyViewer(self._bldr.property(name))) - for name in self._properties - ] + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + class SummarizedContentProperties: def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + @property def summary(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("summary")) - + @property def description(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("description")) + + class SummarizedFunctionAst: def __init__(self, tb: type_builder.TypeBuilder): - _tb = tb._tb # type: ignore (we know how to use this private attribute) + _tb = tb._tb # type: ignore (we know how to use this private attribute) self._bldr = _tb.class_("SummarizedFunction") - self._properties: typing.Set[str] = set( - [ - "name", - "description", - "inputs", - "outputs", - "decorators", - ] - ) + self._properties: typing.Set[str] = set([ "name", "description", "inputs", "outputs", "decorators", ]) self._props = SummarizedFunctionProperties(self._bldr, self._properties) def type(self) -> baml_py.FieldType: @@ -435,34 +786,124 @@ class SummarizedFunctionViewer(SummarizedFunctionAst): def __init__(self, tb: type_builder.TypeBuilder): super().__init__(tb) + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: - return [ - (name, type_builder.ClassPropertyViewer(self._bldr.property(name))) - for name in self._properties - ] + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + class SummarizedFunctionProperties: def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): self.__bldr = bldr - self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + @property def name(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("name")) - + @property def description(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("description")) - + @property def inputs(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("inputs")) - + @property def outputs(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("outputs")) - + @property def decorators(self) -> type_builder.ClassPropertyViewer: return type_builder.ClassPropertyViewer(self.__bldr.property("decorators")) + + + + +class TextContentAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("TextContent") + self._properties: typing.Set[str] = set([ "type", "subclass", ]) + self._props = TextContentProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "TextContentProperties": + return self._props + + +class TextContentViewer(TextContentAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class TextContentProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def type(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("type")) + + @property + def subclass(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("subclass")) + + + + +class VideoContentAst: + def __init__(self, tb: type_builder.TypeBuilder): + _tb = tb._tb # type: ignore (we know how to use this private attribute) + self._bldr = _tb.class_("VideoContent") + self._properties: typing.Set[str] = set([ "type", "subclass", ]) + self._props = VideoContentProperties(self._bldr, self._properties) + + def type(self) -> baml_py.FieldType: + return self._bldr.field() + + @property + def props(self) -> "VideoContentProperties": + return self._props + + +class VideoContentViewer(VideoContentAst): + def __init__(self, tb: type_builder.TypeBuilder): + super().__init__(tb) + + + def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]: + return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties] + + + +class VideoContentProperties: + def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]): + self.__bldr = bldr + self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821 + + + + @property + def type(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("type")) + + @property + def subclass(self) -> type_builder.ClassPropertyViewer: + return type_builder.ClassPropertyViewer(self.__bldr.property("subclass")) + + + diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py index 09dfb30cb..6e6866109 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py @@ -15,18 +15,57 @@ from . import stream_types type_map = { + + "types.AudioContent": types.AudioContent, + "stream_types.AudioContent": stream_types.AudioContent, + + "types.ContentLabel": types.ContentLabel, + "stream_types.ContentLabel": stream_types.ContentLabel, + + "types.DefaultContentPrediction": types.DefaultContentPrediction, + "stream_types.DefaultContentPrediction": stream_types.DefaultContentPrediction, + + "types.DynamicKnowledgeGraph": types.DynamicKnowledgeGraph, + "stream_types.DynamicKnowledgeGraph": stream_types.DynamicKnowledgeGraph, + "types.Edge": types.Edge, "stream_types.Edge": stream_types.Edge, + + "types.ImageContent": types.ImageContent, + "stream_types.ImageContent": stream_types.ImageContent, + "types.KnowledgeGraph": types.KnowledgeGraph, "stream_types.KnowledgeGraph": stream_types.KnowledgeGraph, + + "types.Model3DContent": types.Model3DContent, + "stream_types.Model3DContent": stream_types.Model3DContent, + + "types.MultimediaContent": types.MultimediaContent, + "stream_types.MultimediaContent": stream_types.MultimediaContent, + "types.Node": types.Node, "stream_types.Node": stream_types.Node, + + "types.ProceduralContent": types.ProceduralContent, + "stream_types.ProceduralContent": stream_types.ProceduralContent, + "types.SummarizedClass": types.SummarizedClass, "stream_types.SummarizedClass": stream_types.SummarizedClass, + "types.SummarizedCode": types.SummarizedCode, "stream_types.SummarizedCode": stream_types.SummarizedCode, + "types.SummarizedContent": types.SummarizedContent, "stream_types.SummarizedContent": stream_types.SummarizedContent, + "types.SummarizedFunction": types.SummarizedFunction, "stream_types.SummarizedFunction": stream_types.SummarizedFunction, -} + + "types.TextContent": types.TextContent, + "stream_types.TextContent": stream_types.TextContent, + + "types.VideoContent": types.VideoContent, + "stream_types.VideoContent": stream_types.VideoContent, + + +} \ No newline at end of file diff --git a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py index f7b8b8f1b..a3d0ea358 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py @@ -20,37 +20,44 @@ from pydantic import BaseModel, ConfigDict import baml_py -CheckT = typing_extensions.TypeVar("CheckT") -CheckName = typing_extensions.TypeVar("CheckName", bound=str) - +CheckT = typing_extensions.TypeVar('CheckT') +CheckName = typing_extensions.TypeVar('CheckName', bound=str) class Check(BaseModel): name: str expression: str status: str - - class Checked(BaseModel, typing.Generic[CheckT, CheckName]): value: CheckT checks: typing.Dict[CheckName, Check] - def get_checks(checks: typing.Dict[CheckName, Check]) -> typing.List[Check]: return list(checks.values()) - def all_succeeded(checks: typing.Dict[CheckName, Check]) -> bool: return all(check.status == "succeeded" for check in get_checks(checks)) - - # ######################################################################### # Generated enums (0) # ######################################################################### # ######################################################################### -# Generated classes (7) +# Generated classes (17) # ######################################################################### +class AudioContent(BaseModel): + type: str + subclass: typing.List[str] + +class ContentLabel(BaseModel): + content_type: typing.Union[typing_extensions.Literal['text'], typing_extensions.Literal['audio'], typing_extensions.Literal['image'], typing_extensions.Literal['video'], typing_extensions.Literal['multimedia'], typing_extensions.Literal['3d_model'], typing_extensions.Literal['procedural']] + type: str + subclass: typing.List[str] + +class DefaultContentPrediction(BaseModel): + label: "ContentLabel" + +class DynamicKnowledgeGraph(BaseModel): + model_config = ConfigDict(extra='allow') class Edge(BaseModel): # doc string for edge @@ -60,19 +67,32 @@ class Edge(BaseModel): target_node_id: str relationship_name: str +class ImageContent(BaseModel): + type: str + subclass: typing.List[str] class KnowledgeGraph(BaseModel): nodes: typing.List["Node"] edges: typing.List["Edge"] +class Model3DContent(BaseModel): + type: str + subclass: typing.List[str] + +class MultimediaContent(BaseModel): + type: str + subclass: typing.List[str] class Node(BaseModel): - model_config = ConfigDict(extra="allow") + model_config = ConfigDict(extra='allow') id: str name: str type: str description: str +class ProceduralContent(BaseModel): + type: str + subclass: typing.List[str] class SummarizedClass(BaseModel): name: str @@ -80,7 +100,6 @@ class SummarizedClass(BaseModel): methods: typing.Optional[typing.List["SummarizedFunction"]] = None decorators: typing.Optional[typing.List[str]] = None - class SummarizedCode(BaseModel): high_level_summary: str key_features: typing.List[str] @@ -90,12 +109,10 @@ class SummarizedCode(BaseModel): functions: typing.List["SummarizedFunction"] workflow_description: typing.Optional[str] = None - class SummarizedContent(BaseModel): summary: str description: str - class SummarizedFunction(BaseModel): name: str description: str @@ -103,6 +120,13 @@ class SummarizedFunction(BaseModel): outputs: typing.Optional[typing.List[str]] = None decorators: typing.Optional[typing.List[str]] = None +class TextContent(BaseModel): + type: str + subclass: typing.List[str] + +class VideoContent(BaseModel): + type: str + subclass: typing.List[str] # ######################################################################### # Generated type aliases (0) diff --git a/cognee/infrastructure/llm/structured_output_framework/baml_src/extract_categories.baml b/cognee/infrastructure/llm/structured_output_framework/baml_src/extract_categories.baml index e69de29bb..c718b754e 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml_src/extract_categories.baml +++ b/cognee/infrastructure/llm/structured_output_framework/baml_src/extract_categories.baml @@ -0,0 +1,109 @@ +// Content classification data models - matching shared/data_models.py +class TextContent { + type string + subclass string[] +} + +class AudioContent { + type string + subclass string[] +} + +class ImageContent { + type string + subclass string[] +} + +class VideoContent { + type string + subclass string[] +} + +class MultimediaContent { + type string + subclass string[] +} + +class Model3DContent { + type string + subclass string[] +} + +class ProceduralContent { + type string + subclass string[] +} + +class ContentLabel { + content_type "text" | "audio" | "image" | "video" | "multimedia" | "3d_model" | "procedural" + type string + subclass string[] +} + +class DefaultContentPrediction { + label ContentLabel +} + +// Content classification prompt template +template_string ClassifyContentPrompt() #" + You are a classification engine and should classify content. Make sure to use one of the existing classification options and not invent your own. + + Classify the content into one of these main categories and their relevant subclasses: + + **TEXT CONTENT** (content_type: "text"): + - type: "TEXTUAL_DOCUMENTS_USED_FOR_GENERAL_PURPOSES" + - subclass options: ["Articles, essays, and reports", "Books and manuscripts", "News stories and blog posts", "Research papers and academic publications", "Social media posts and comments", "Website content and product descriptions", "Personal narratives and stories", "Spreadsheets and tables", "Forms and surveys", "Databases and CSV files", "Source code in various programming languages", "Shell commands and scripts", "Markup languages (HTML, XML)", "Stylesheets (CSS) and configuration files (YAML, JSON, INI)", "Chat transcripts and messaging history", "Customer service logs and interactions", "Conversational AI training data", "Textbook content and lecture notes", "Exam questions and academic exercises", "E-learning course materials", "Poetry and prose", "Scripts for plays, movies, and television", "Song lyrics", "Manuals and user guides", "Technical specifications and API documentation", "Helpdesk articles and FAQs", "Contracts and agreements", "Laws, regulations, and legal case documents", "Policy documents and compliance materials", "Clinical trial reports", "Patient records and case notes", "Scientific journal articles", "Financial reports and statements", "Business plans and proposals", "Market research and analysis reports", "Ad copies and marketing slogans", "Product catalogs and brochures", "Press releases and promotional content", "Professional and formal correspondence", "Personal emails and letters", "Image and video captions", "Annotations and metadata for various media", "Vocabulary lists and grammar rules", "Language exercises and quizzes", "Other types of text data"] + + **AUDIO CONTENT** (content_type: "audio"): + - type: "AUDIO_DOCUMENTS_USED_FOR_GENERAL_PURPOSES" + - subclass options: ["Music tracks and albums", "Podcasts and radio broadcasts", "Audiobooks and audio guides", "Recorded interviews and speeches", "Sound effects and ambient sounds", "Other types of audio recordings"] + + **IMAGE CONTENT** (content_type: "image"): + - type: "IMAGE_DOCUMENTS_USED_FOR_GENERAL_PURPOSES" + - subclass options: ["Photographs and digital images", "Illustrations, diagrams, and charts", "Infographics and visual data representations", "Artwork and paintings", "Screenshots and graphical user interfaces", "Other types of images"] + + **VIDEO CONTENT** (content_type: "video"): + - type: "VIDEO_DOCUMENTS_USED_FOR_GENERAL_PURPOSES" + - subclass options: ["Movies and short films", "Documentaries and educational videos", "Video tutorials and how-to guides", "Animated features and cartoons", "Live event recordings and sports broadcasts", "Other types of video content"] + + **MULTIMEDIA CONTENT** (content_type: "multimedia"): + - type: "MULTIMEDIA_DOCUMENTS_USED_FOR_GENERAL_PURPOSES" + - subclass options: ["Interactive web content and games", "Virtual reality (VR) and augmented reality (AR) experiences", "Mixed media presentations and slide decks", "E-learning modules with integrated multimedia", "Digital exhibitions and virtual tours", "Other types of multimedia content"] + + **3D MODEL CONTENT** (content_type: "3d_model"): + - type: "3D_MODEL_DOCUMENTS_USED_FOR_GENERAL_PURPOSES" + - subclass options: ["Architectural renderings and building plans", "Product design models and prototypes", "3D animations and character models", "Scientific simulations and visualizations", "Virtual objects for AR/VR applications", "Other types of 3D models"] + + **PROCEDURAL CONTENT** (content_type: "procedural"): + - type: "PROCEDURAL_DOCUMENTS_USED_FOR_GENERAL_PURPOSES" + - subclass options: ["Tutorials and step-by-step guides", "Workflow and process descriptions", "Simulation and training exercises", "Recipes and crafting instructions", "Other types of procedural content"] + + Select the most appropriate content_type, type, and relevant subclasses. +"# + +// OpenAI client defined once for all BAML files + +// Classification function +function ExtractCategories(content: string) -> DefaultContentPrediction { + client OpenAI + + prompt #" + {{ ClassifyContentPrompt() }} + + {{ ctx.output_format(prefix="Answer in this schema:\n") }} + + {{ _.role('user') }} + {{ content }} + "# +} + +// Test case for classification +test ExtractCategoriesExample { + functions [ExtractCategories] + args { + content #" + Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval. + It deals with the interaction between computers and human language, in particular how to program computers to process and analyze large amounts of natural language data. + "# + } +} diff --git a/cognee/infrastructure/llm/structured_output_framework/baml_src/extract_content_graph.baml b/cognee/infrastructure/llm/structured_output_framework/baml_src/extract_content_graph.baml index ca5f9981b..5b500f12e 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml_src/extract_content_graph.baml +++ b/cognee/infrastructure/llm/structured_output_framework/baml_src/extract_content_graph.baml @@ -50,6 +50,11 @@ class SummarizedCode { workflow_description string? } +class DynamicKnowledgeGraph { + @@dynamic +} + + // Simple template for basic extraction (fast, good quality) template_string ExtractContentGraphPrompt() #" You are an advanced algorithm that extracts structured data into a knowledge graph. @@ -219,31 +224,15 @@ template_string StrictExtractContentGraphPrompt() #" "# // OpenAI client with environment model selection -client OpenAIClientWithEnvModel { +client OpenAI { provider openai options { - model env.LLM_MODEL - api_key env.OPENAI_API_KEY + model client_registry.model + api_key client_registry.api_key } } -// Anthropic client with environment model selection -client AnthropicClientWithEnvModel { - provider anthropic - options { - model env.LLM_MODEL - api_key env.ANTHROPIC_API_KEY - } -} -// Default client (maintains backward compatibility) -client DefaultClient { - provider openai - options { - model "gpt-4o-mini" - api_key env.OPENAI_API_KEY - } -} // Function that returns raw structured output (for custom objects - to be handled in Python) function ExtractContentGraphGeneric( @@ -251,7 +240,7 @@ function ExtractContentGraphGeneric( mode: "simple" | "base" | "guided" | "strict" | "custom"?, custom_prompt_content: string? ) -> KnowledgeGraph { - client OpenAIClientWithEnvModel + client OpenAI prompt #" {% if mode == "base" %} @@ -281,12 +270,12 @@ function ExtractContentGraphGeneric( } // Backward-compatible function specifically for KnowledgeGraph -function ExtractContentGraph( +function ExtractDynamicContentGraph( content: string, mode: "simple" | "base" | "guided" | "strict" | "custom"?, custom_prompt_content: string? -) -> KnowledgeGraph { - client OpenAIClientWithEnvModel +) -> DynamicKnowledgeGraph { + client OpenAI prompt #" {% if mode == "base" %} @@ -315,72 +304,10 @@ function ExtractContentGraph( "# } -// Alternative function that uses environment variable for prompt selection -function ExtractContentGraphWithEnvPrompt( - content: string, - prompt_override: string? -) -> KnowledgeGraph { - client OpenAIClientWithEnvModel - - prompt #" - {% if prompt_override %} - {{ prompt_override }} - {% else %} - {{ ExtractContentGraphPrompt() }} - {% endif %} - - {{ ctx.output_format(prefix="Answer in this schema:\n") }} - - Before answering, briefly describe what you'll extract from the text, then provide the structured output. - - Example format: - I'll extract the main entities and their relationships from this text... - - { ... } - - {{ _.role('user') }} - {{ content }} - "# -} - -// Function that uses Anthropic client -function ExtractContentGraphWithAnthropic( - content: string, - mode: "simple" | "base" | "guided" | "strict" | "custom"?, - custom_prompt_content: string? -) -> KnowledgeGraph { - client AnthropicClientWithEnvModel - - prompt #" - {% if mode == "base" %} - {{ DetailedExtractContentGraphPrompt() }} - {% elif mode == "guided" %} - {{ GuidedExtractContentGraphPrompt() }} - {% elif mode == "strict" %} - {{ StrictExtractContentGraphPrompt() }} - {% elif mode == "custom" and custom_prompt_content %} - {{ custom_prompt_content }} - {% else %} - {{ ExtractContentGraphPrompt() }} - {% endif %} - - {{ ctx.output_format(prefix="Answer in this schema:\n") }} - - Before answering, briefly describe what you'll extract from the text, then provide the structured output. - - Example format: - I'll extract the main entities and their relationships from this text... - - { ... } - - {{ _.role('user') }} - {{ content }} - "# -} // Summarization functions function SummarizeContent(content: string) -> SummarizedContent { - client OpenAIClientWithEnvModel + client OpenAI prompt #" {{ SummarizeContentPrompt() }} @@ -393,7 +320,7 @@ function SummarizeContent(content: string) -> SummarizedContent { } function SummarizeCode(content: string) -> SummarizedCode { - client OpenAIClientWithEnvModel + client OpenAI prompt #" {{ SummarizeCodePrompt() }} @@ -405,71 +332,12 @@ function SummarizeCode(content: string) -> SummarizedCode { "# } -test ExtractPersonExample { - functions [ExtractContentGraph] - args { - content #" - My name is Vasiliy. I was born in 1992. I am a software engineer. I work at Google and am based in Berlin. - "# - mode "simple" - } -} - -test ExtractGuidedExample { - functions [ExtractContentGraph] - args { - content #" - Apple Inc. was founded by Steve Jobs in 1976. The company is headquartered in Cupertino, California. - Tim Cook is the current CEO of Apple Inc. - "# - mode "guided" - } -} - test ExtractStrictExample { - functions [ExtractContentGraph] + functions [ExtractContentGraphGeneric] args { content #" The Python programming language was created by Guido van Rossum in 1991. "# mode "strict" } -} - -test ExtractGenericExample { - functions [ExtractContentGraphGeneric] - args { - content #" - React is a JavaScript library for building user interfaces, developed by Facebook. - "# - mode "simple" - } -} - -test SummarizeContentExample { - functions [SummarizeContent] - args { - content #" - Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval. - It deals with the interaction between computers and human language, in particular how to program computers to process and analyze large amounts of natural language data. - "# - } -} - -test SummarizeCodeExample { - functions [SummarizeCode] - args { - content #" - def fibonacci(n): - if n <= 1: - return n - return fibonacci(n-1) + fibonacci(n-2) - - def main(): - print(fibonacci(10)) - - if __name__ == "__main__": - main() - "# - } -} +} \ No newline at end of file diff --git a/cognee/infrastructure/llm/structured_output_framework/baml_src/extraction/extract_categories.py b/cognee/infrastructure/llm/structured_output_framework/baml_src/extraction/extract_categories.py index e69de29bb..02397d956 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml_src/extraction/extract_categories.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml_src/extraction/extract_categories.py @@ -0,0 +1,113 @@ +import os +from typing import Type +from pydantic import BaseModel +from cognee.infrastructure.llm.structured_output_framework.baml_src.config import get_llm_config + +config = get_llm_config() +from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b +from cognee.shared.data_models import SummarizedCode +from cognee.shared.logging_utils import get_logger +from baml_py import ClientRegistry + +logger = get_logger("extract_summary_baml") + + +def get_mock_summarized_code(): + """Local mock function to avoid circular imports.""" + return SummarizedCode( + high_level_summary="Mock code summary", + key_features=["Mock feature 1", "Mock feature 2"], + imports=["mock_import"], + constants=["MOCK_CONSTANT"], + classes=[], + functions=[], + workflow_description="Mock workflow description", + ) + + +async def extract_summary(content: str, response_model: Type[BaseModel]): + """ + Extract summary using BAML framework. + + Args: + content: The content to summarize + response_model: The Pydantic model type for the response + + Returns: + BaseModel: The summarized content in the specified format + """ + config = get_llm_config() + + baml_registry = ClientRegistry() + + baml_registry.add_llm_client( + name="extract_category_client", + provider=config.llm_model, + options={ + "model": config.llm_model, + "temperature": config.llm_temperature, + "api_key": config.llm_api_key, + }, + ) + baml_registry.set_primary("extract_category_client") + + # Use BAML's SummarizeContent function + summary_result = await b.SummarizeContent( + content, baml_options={"client_registry": baml_registry} + ) + + # Convert BAML result to the expected response model + if response_model is SummarizedCode: + # If it's asking for SummarizedCode but we got SummarizedContent, + # we need to use SummarizeCode instead + code_result = await b.SummarizeCode( + content, baml_options={"client_registry": baml_registry} + ) + return code_result + else: + # For other models, return the summary result + return summary_result + + +async def extract_code_summary(content: str): + """ + Extract code summary using BAML framework with mocking support. + + Args: + content: The code content to summarize + + Returns: + SummarizedCode: The summarized code information + """ + enable_mocking = os.getenv("MOCK_CODE_SUMMARY", "false") + if isinstance(enable_mocking, bool): + enable_mocking = str(enable_mocking).lower() + enable_mocking = enable_mocking in ("true", "1", "yes") + + if enable_mocking: + result = get_mock_summarized_code() + return result + else: + try: + config = get_llm_config() + + baml_registry = ClientRegistry() + + baml_registry.add_llm_client( + name="extract_content_category", + provider=config.llm_provider, + options={ + "model": config.llm_model, + "temperature": config.llm_temperature, + "api_key": config.llm_api_key, + }, + ) + baml_registry.set_primary("extract_content_category") + result = await b.SummarizeCode(content, baml_options={"client_registry": baml_registry}) + except Exception as e: + logger.error( + "Failed to extract code summary with BAML, falling back to mock summary", exc_info=e + ) + result = get_mock_summarized_code() + + return result diff --git a/cognee/infrastructure/llm/structured_output_framework/baml_src/extraction/extract_summary.py b/cognee/infrastructure/llm/structured_output_framework/baml_src/extraction/extract_summary.py index 988569565..c35b3f69c 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml_src/extraction/extract_summary.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml_src/extraction/extract_summary.py @@ -1,11 +1,13 @@ import os from typing import Type from pydantic import BaseModel +from cognee.infrastructure.llm.structured_output_framework.baml_src.config import get_llm_config +config = get_llm_config() from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b from cognee.infrastructure.llm.structured_output_framework.baml_src.config import get_llm_config from cognee.shared.data_models import SummarizedCode from cognee.shared.logging_utils import get_logger - +from baml_py import ClientRegistry logger = get_logger("extract_summary_baml") @@ -35,9 +37,21 @@ async def extract_summary(content: str, response_model: Type[BaseModel]): """ config = get_llm_config() + baml_registry = ClientRegistry() + + baml_registry.add_llm_client( + name="def", + provider="openai", + options={ + "model": config.llm_model, + "temperature": config.llm_temperature, + "api_key": config.llm_api_key + }) + baml_registry.set_primary('def') + # Use BAML's SummarizeContent function summary_result = await b.SummarizeContent( - content, baml_options={"client_registry": config.baml_registry} + content, baml_options={"client_registry": baml_registry} ) # Convert BAML result to the expected response model @@ -74,8 +88,20 @@ async def extract_code_summary(content: str): else: try: config = get_llm_config() + + baml_registry = ClientRegistry() + + baml_registry.add_llm_client( + name="def", + provider="openai", + options={ + "model": config.llm_model, + "temperature": config.llm_temperature, + "api_key": config.llm_api_key + }) + baml_registry.set_primary('def') result = await b.SummarizeCode( - content, baml_options={"client_registry": config.baml_registry} + content, baml_options={"client_registry": baml_registry} ) except Exception as e: logger.error( diff --git a/cognee/infrastructure/llm/structured_output_framework/baml_src/extraction/knowledge_graph/extract_content_graph.py b/cognee/infrastructure/llm/structured_output_framework/baml_src/extraction/knowledge_graph/extract_content_graph.py index 9afc51854..978c5711e 100644 --- a/cognee/infrastructure/llm/structured_output_framework/baml_src/extraction/knowledge_graph/extract_content_graph.py +++ b/cognee/infrastructure/llm/structured_output_framework/baml_src/extraction/knowledge_graph/extract_content_graph.py @@ -1,26 +1,50 @@ import os from typing import Type from pydantic import BaseModel -from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b -from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.type_builder import ( - TypeBuilder, -) from cognee.infrastructure.llm.structured_output_framework.baml_src.config import get_llm_config +config = get_llm_config() +from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b from cognee.shared.logging_utils import get_logger, setup_logging +from baml_py import ClientRegistry -async def extract_content_graph(content: str, response_model: Type[BaseModel]): - # tb = TypeBuilder() +async def extract_content_graph( + content: str, response_model: Type[BaseModel], mode: str = "simple" +): config = get_llm_config() setup_logging() - get_logger(level="INFO") - # country = tb.union \ - # ([tb.literal_string("USA"), tb.literal_string("UK"), tb.literal_string("Germany"), tb.literal_string("other")]) - # tb.Node.add_property("country", country) + get_logger(level="INFO") + + baml_registry = ClientRegistry() + + baml_registry.add_llm_client( + name="extract_content_client", + provider=config.llm_provider, + options={ + "model": config.llm_model, + "temperature": config.llm_temperature, + "api_key": config.llm_api_key, + }, + ) + baml_registry.set_primary("extract_content_client") + + # if response_model: + # # tb = TypeBuilder() + # # country = tb.union \ + # # ([tb.literal_string("USA"), tb.literal_string("UK"), tb.literal_string("Germany"), tb.literal_string("other")]) + # # tb.Node.add_property("country", country) + # + # graph = await b.ExtractDynamicContentGraph( + # content, mode=mode, baml_options={"client_registry": baml_registry} + # ) + # + # return graph + + # else: graph = await b.ExtractContentGraph( - content, mode="simple", baml_options={"client_registry": config.baml_registry} + content, mode=mode, baml_options={"client_registry": baml_registry} ) return graph