first draft of BAML

This commit is contained in:
vasilije 2025-07-06 12:09:19 +02:00
parent c936f5e0a3
commit a7ce271e74
138 changed files with 2134 additions and 123 deletions

View file

@ -2,9 +2,9 @@ from uuid import NAMESPACE_OID, uuid5
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.infrastructure.databases.vector import get_vector_engine
from cognee.infrastructure.llm.prompts import render_prompt
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.prompts import render_prompt
from cognee.low_level import DataPoint
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.get_llm_client import get_llm_client
from cognee.shared.logging_utils import get_logger
from cognee.modules.engine.models import NodeSet
from cognee.tasks.storage import add_data_points, index_graph_edges

View file

@ -7,7 +7,7 @@ from cognee.modules.cognify.config import get_cognify_config
from cognee.infrastructure.data.chunking.config import get_chunk_config
from cognee.infrastructure.databases.vector import get_vectordb_config
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.llm.config import get_llm_config
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.config import get_llm_config
from cognee.infrastructure.databases.relational import get_relational_config, get_migration_config
from cognee.infrastructure.files.storage import LocalStorage

View file

@ -17,7 +17,7 @@ from cognee.api.v1.responses.models import (
)
from cognee.api.v1.responses.dispatch_function import dispatch_function
from cognee.api.v1.responses.default_tools import DEFAULT_TOOLS
from cognee.infrastructure.llm.config import get_llm_config
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.config import get_llm_config
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user

View file

@ -9,6 +9,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
class BaseConfig(BaseSettings):
data_root_directory: str = get_absolute_path(".data_storage")
monitoring_tool: object = Observer.LANGFUSE
structured_output_framework: str = os.getenv("STRUCTURED_OUTPUT_FRAMEWORK")
graphistry_username: Optional[str] = os.getenv("GRAPHISTRY_USERNAME")
graphistry_password: Optional[str] = os.getenv("GRAPHISTRY_PASSWORD")
langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")

View file

@ -1,8 +1,8 @@
from typing import Any, Dict, List
from pydantic import BaseModel
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.get_llm_client import get_llm_client
from cognee.eval_framework.evaluation.base_eval_adapter import BaseEvalAdapter
from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.prompts import read_query_prompt, render_prompt
from cognee.eval_framework.eval_config import EvalConfig

View file

@ -5,7 +5,7 @@ import litellm
import os
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
from cognee.infrastructure.databases.exceptions.EmbeddingException import EmbeddingException
from cognee.infrastructure.llm.tokenizer.TikToken import TikTokenTokenizer
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.tokenizer.TikToken import TikTokenTokenizer
litellm.set_verbose = False
logger = get_logger("FastembedEmbeddingEngine")

View file

@ -7,11 +7,11 @@ import litellm
import os
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
from cognee.infrastructure.databases.exceptions.EmbeddingException import EmbeddingException
from cognee.infrastructure.llm.tokenizer.Gemini import GeminiTokenizer
from cognee.infrastructure.llm.tokenizer.HuggingFace import HuggingFaceTokenizer
from cognee.infrastructure.llm.tokenizer.Mistral import MistralTokenizer
from cognee.infrastructure.llm.tokenizer.TikToken import TikTokenTokenizer
from cognee.infrastructure.llm.embedding_rate_limiter import (
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.tokenizer.Gemini import GeminiTokenizer
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.tokenizer.HuggingFace import HuggingFaceTokenizer
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.tokenizer import MistralTokenizer
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.tokenizer.TikToken import TikTokenTokenizer
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.embedding_rate_limiter import (
embedding_rate_limit_async,
embedding_sleep_and_retry_async,
)

View file

@ -7,9 +7,8 @@ import os
import aiohttp.http_exceptions
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
from cognee.infrastructure.databases.exceptions.EmbeddingException import EmbeddingException
from cognee.infrastructure.llm.tokenizer.HuggingFace import HuggingFaceTokenizer
from cognee.infrastructure.llm.embedding_rate_limiter import (
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.tokenizer.HuggingFace import HuggingFaceTokenizer
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.embedding_rate_limiter import (
embedding_rate_limit_async,
embedding_sleep_and_retry_async,
)

View file

@ -1,5 +1,5 @@
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
from cognee.infrastructure.llm.config import get_llm_config
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.config import get_llm_config
from .EmbeddingEngine import EmbeddingEngine
from functools import lru_cache

View file

@ -1,4 +1,4 @@
from .config import get_llm_config
from .utils import get_max_chunk_tokens
from .utils import test_llm_connection
from .utils import test_embedding_connection
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.config import get_llm_config
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.utils import get_max_chunk_tokens
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.utils import test_llm_connection
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.utils import test_embedding_connection

View file

@ -0,0 +1,58 @@
# ----------------------------------------------------------------------------
#
# Welcome to Baml! To use this generated code, please run the following:
#
# $ pip install baml
#
# ----------------------------------------------------------------------------
# This file was generated by BAML: please do not edit it. Instead, edit the
# BAML files and re-generate this code using: baml-cli generate
# baml-cli is available with the baml package.
__version__ = "0.201.0"
try:
from baml_py.safe_import import EnsureBamlPyImport
except ImportError:
raise ImportError(f"""Update to baml-py required.
Version of baml_client generator (see generators.baml): {__version__}
Please upgrade baml-py to version "{__version__}".
$ pip install baml-py=={__version__}
$ uv add baml-py=={__version__}
If nothing else works, please ask for help:
https://github.com/boundaryml/baml/issues
https://boundaryml.com/discord
""") from None
with EnsureBamlPyImport(__version__) as e:
e.raise_if_incompatible_version(__version__)
from . import types
from . import tracing
from . import stream_types
from . import config
from .config import reset_baml_env_vars
from .sync_client import b
# FOR LEGACY COMPATIBILITY, expose "partial_types" as an alias for "stream_types"
# WE RECOMMEND USERS TO USE "stream_types" INSTEAD
partial_types = stream_types
__all__ = [
"b",
"stream_types",
"partial_types",
"tracing",
"types",
"reset_baml_env_vars",
"config",
]

View file

@ -0,0 +1,235 @@
# ----------------------------------------------------------------------------
#
# Welcome to Baml! To use this generated code, please run the following:
#
# $ pip install baml
#
# ----------------------------------------------------------------------------
# This file was generated by BAML: please do not edit it. Instead, edit the
# BAML files and re-generate this code using: baml-cli generate
# baml-cli is available with the baml package.
import typing
import typing_extensions
import baml_py
from . import stream_types, types, type_builder
from .parser import LlmResponseParser, LlmStreamParser
from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions
from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__
class BamlAsyncClient:
__options: DoNotUseDirectlyCallManager
__stream_client: "BamlStreamClient"
__http_request: "BamlHttpRequestClient"
__http_stream_request: "BamlHttpStreamRequestClient"
__llm_response_parser: LlmResponseParser
__llm_stream_parser: LlmStreamParser
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
self.__stream_client = BamlStreamClient(options)
self.__http_request = BamlHttpRequestClient(options)
self.__http_stream_request = BamlHttpStreamRequestClient(options)
self.__llm_response_parser = LlmResponseParser(options)
self.__llm_stream_parser = LlmStreamParser(options)
def with_options(self,
tb: typing.Optional[type_builder.TypeBuilder] = None,
client_registry: typing.Optional[baml_py.baml_py.ClientRegistry] = None,
collector: typing.Optional[typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]]] = None,
env: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None,
) -> "BamlAsyncClient":
options: BamlCallOptions = {}
if tb is not None:
options["tb"] = tb
if client_registry is not None:
options["client_registry"] = client_registry
if collector is not None:
options["collector"] = collector
if env is not None:
options["env"] = env
return BamlAsyncClient(self.__options.merge_options(options))
@property
def stream(self):
return self.__stream_client
@property
def request(self):
return self.__http_request
@property
def stream_request(self):
return self.__http_stream_request
@property
def parse(self):
return self.__llm_response_parser
@property
def parse_stream(self):
return self.__llm_stream_parser
async def ExtractContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> types.KnowledgeGraph:
result = await self.__options.merge_options(baml_options).call_function_async(function_name="ExtractContentGraph", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
})
return typing.cast(types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__))
async def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> types.KnowledgeGraph:
result = await self.__options.merge_options(baml_options).call_function_async(function_name="ExtractContentGraphGeneric", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
})
return typing.cast(types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__))
async def ExtractContentGraphWithAnthropic(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> types.KnowledgeGraph:
result = await self.__options.merge_options(baml_options).call_function_async(function_name="ExtractContentGraphWithAnthropic", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
})
return typing.cast(types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__))
async def ExtractContentGraphWithEnvPrompt(self, content: str,prompt_override: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> types.KnowledgeGraph:
result = await self.__options.merge_options(baml_options).call_function_async(function_name="ExtractContentGraphWithEnvPrompt", args={
"content": content,"prompt_override": prompt_override,
})
return typing.cast(types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__))
class BamlStreamClient:
__options: DoNotUseDirectlyCallManager
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
def ExtractContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]:
ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="ExtractContentGraph", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
})
return baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph](
result,
lambda x: typing.cast(stream_types.KnowledgeGraph, x.cast_to(types, types, stream_types, True, __runtime__)),
lambda x: typing.cast(types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]:
ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="ExtractContentGraphGeneric", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
})
return baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph](
result,
lambda x: typing.cast(stream_types.KnowledgeGraph, x.cast_to(types, types, stream_types, True, __runtime__)),
lambda x: typing.cast(types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
def ExtractContentGraphWithAnthropic(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]:
ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="ExtractContentGraphWithAnthropic", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
})
return baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph](
result,
lambda x: typing.cast(stream_types.KnowledgeGraph, x.cast_to(types, types, stream_types, True, __runtime__)),
lambda x: typing.cast(types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
def ExtractContentGraphWithEnvPrompt(self, content: str,prompt_override: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]:
ctx, result = self.__options.merge_options(baml_options).create_async_stream(function_name="ExtractContentGraphWithEnvPrompt", args={
"content": content,"prompt_override": prompt_override,
})
return baml_py.BamlStream[stream_types.KnowledgeGraph, types.KnowledgeGraph](
result,
lambda x: typing.cast(stream_types.KnowledgeGraph, x.cast_to(types, types, stream_types, True, __runtime__)),
lambda x: typing.cast(types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
class BamlHttpRequestClient:
__options: DoNotUseDirectlyCallManager
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
async def ExtractContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractContentGraph", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
}, mode="request")
return result
async def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractContentGraphGeneric", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
}, mode="request")
return result
async def ExtractContentGraphWithAnthropic(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractContentGraphWithAnthropic", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
}, mode="request")
return result
async def ExtractContentGraphWithEnvPrompt(self, content: str,prompt_override: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractContentGraphWithEnvPrompt", args={
"content": content,"prompt_override": prompt_override,
}, mode="request")
return result
class BamlHttpStreamRequestClient:
__options: DoNotUseDirectlyCallManager
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
async def ExtractContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractContentGraph", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
}, mode="stream")
return result
async def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractContentGraphGeneric", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
}, mode="stream")
return result
async def ExtractContentGraphWithAnthropic(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractContentGraphWithAnthropic", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
}, mode="stream")
return result
async def ExtractContentGraphWithEnvPrompt(self, content: str,prompt_override: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = await self.__options.merge_options(baml_options).create_http_request_async(function_name="ExtractContentGraphWithEnvPrompt", args={
"content": content,"prompt_override": prompt_override,
}, mode="stream")
return result
b = BamlAsyncClient(DoNotUseDirectlyCallManager({}))

View file

@ -0,0 +1,94 @@
# ----------------------------------------------------------------------------
#
# Welcome to Baml! To use this generated code, please run the following:
#
# $ pip install baml
#
# ----------------------------------------------------------------------------
# This file was generated by BAML: please do not edit it. Instead, edit the
# BAML files and re-generate this code using: baml-cli generate
# baml-cli is available with the baml package.
from __future__ import annotations
import os
import warnings
import typing_extensions
import typing
import functools
from baml_py.logging import (
get_log_level as baml_get_log_level,
set_log_level as baml_set_log_level,
)
from .globals import reset_baml_env_vars
rT = typing_extensions.TypeVar("rT") # return type
pT = typing_extensions.ParamSpec("pT") # parameters type
def _deprecated(message: str):
def decorator(func: typing.Callable[pT, rT]) -> typing.Callable[pT, rT]:
"""Use this decorator to mark functions as deprecated.
Every time the decorated function runs, it will emit
a "deprecation" warning."""
@functools.wraps(func)
def new_func(*args: pT.args, **kwargs: pT.kwargs):
warnings.simplefilter("always", DeprecationWarning) # turn off filter
warnings.warn(
"Call to a deprecated function {}.".format(func.__name__) + message,
category=DeprecationWarning,
stacklevel=2,
)
warnings.simplefilter("default", DeprecationWarning) # reset filter
return func(*args, **kwargs)
return new_func
return decorator
@_deprecated("Use os.environ['BAML_LOG'] instead")
def get_log_level():
"""
Get the log level for the BAML Python client.
"""
return baml_get_log_level()
@_deprecated("Use os.environ['BAML_LOG'] instead")
def set_log_level(
level: typing_extensions.Literal["DEBUG", "INFO", "WARN", "ERROR", "OFF"] | str,
):
"""
Set the log level for the BAML Python client
"""
baml_set_log_level(level)
os.environ["BAML_LOG"] = level
@_deprecated("Use os.environ['BAML_LOG_JSON_MODE'] instead")
def set_log_json_mode():
"""
Set the log JSON mode for the BAML Python client.
"""
os.environ["BAML_LOG_JSON_MODE"] = "true"
@_deprecated("Use os.environ['BAML_LOG_MAX_CHUNK_LENGTH'] instead")
def set_log_max_chunk_length():
"""
Set the maximum log chunk length for the BAML Python client.
"""
os.environ["BAML_LOG_MAX_CHUNK_LENGTH"] = "1000"
__all__ = [
"set_log_level",
"get_log_level",
"set_log_json_mode",
"reset_baml_env_vars",
"set_log_max_chunk_length",
]

View file

@ -0,0 +1,35 @@
# ----------------------------------------------------------------------------
#
# Welcome to Baml! To use this generated code, please run the following:
#
# $ pip install baml
#
# ----------------------------------------------------------------------------
# This file was generated by BAML: please do not edit it. Instead, edit the
# BAML files and re-generate this code using: baml-cli generate
# baml-cli is available with the baml package.
from __future__ import annotations
import os
import warnings
from baml_py import BamlCtxManager, BamlRuntime
from .inlinedbaml import get_baml_files
from typing import Dict
DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME = BamlRuntime.from_files(
"baml_src",
get_baml_files(),
os.environ.copy()
)
DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX = BamlCtxManager(DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME)
def reset_baml_env_vars(env_vars: Dict[str, str]):
warnings.warn(
"reset_baml_env_vars is deprecated and should be removed. Environment variables are now lazily loaded on each function call",
DeprecationWarning,
stacklevel=2
)
__all__ = []

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,81 @@
# ----------------------------------------------------------------------------
#
# Welcome to Baml! To use this generated code, please run the following:
#
# $ pip install baml
#
# ----------------------------------------------------------------------------
# This file was generated by BAML: please do not edit it. Instead, edit the
# BAML files and re-generate this code using: baml-cli generate
# baml-cli is available with the baml package.
import typing
import typing_extensions
from . import stream_types, types
from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions
class LlmResponseParser:
__options: DoNotUseDirectlyCallManager
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
def ExtractContentGraph(
self, llm_response: str, baml_options: BamlCallOptions = {},
) -> types.KnowledgeGraph:
result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractContentGraph", llm_response=llm_response, mode="request")
return typing.cast(types.KnowledgeGraph, result)
def ExtractContentGraphGeneric(
self, llm_response: str, baml_options: BamlCallOptions = {},
) -> types.KnowledgeGraph:
result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractContentGraphGeneric", llm_response=llm_response, mode="request")
return typing.cast(types.KnowledgeGraph, result)
def ExtractContentGraphWithAnthropic(
self, llm_response: str, baml_options: BamlCallOptions = {},
) -> types.KnowledgeGraph:
result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractContentGraphWithAnthropic", llm_response=llm_response, mode="request")
return typing.cast(types.KnowledgeGraph, result)
def ExtractContentGraphWithEnvPrompt(
self, llm_response: str, baml_options: BamlCallOptions = {},
) -> types.KnowledgeGraph:
result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractContentGraphWithEnvPrompt", llm_response=llm_response, mode="request")
return typing.cast(types.KnowledgeGraph, result)
class LlmStreamParser:
__options: DoNotUseDirectlyCallManager
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
def ExtractContentGraph(
self, llm_response: str, baml_options: BamlCallOptions = {},
) -> stream_types.KnowledgeGraph:
result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractContentGraph", llm_response=llm_response, mode="stream")
return typing.cast(stream_types.KnowledgeGraph, result)
def ExtractContentGraphGeneric(
self, llm_response: str, baml_options: BamlCallOptions = {},
) -> stream_types.KnowledgeGraph:
result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractContentGraphGeneric", llm_response=llm_response, mode="stream")
return typing.cast(stream_types.KnowledgeGraph, result)
def ExtractContentGraphWithAnthropic(
self, llm_response: str, baml_options: BamlCallOptions = {},
) -> stream_types.KnowledgeGraph:
result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractContentGraphWithAnthropic", llm_response=llm_response, mode="stream")
return typing.cast(stream_types.KnowledgeGraph, result)
def ExtractContentGraphWithEnvPrompt(
self, llm_response: str, baml_options: BamlCallOptions = {},
) -> stream_types.KnowledgeGraph:
result = self.__options.merge_options(baml_options).parse_response(function_name="ExtractContentGraphWithEnvPrompt", llm_response=llm_response, mode="stream")
return typing.cast(stream_types.KnowledgeGraph, result)

View file

@ -0,0 +1,256 @@
# ----------------------------------------------------------------------------
#
# Welcome to Baml! To use this generated code, please run the following:
#
# $ pip install baml
#
# ----------------------------------------------------------------------------
# This file was generated by BAML: please do not edit it. Instead, edit the
# BAML files and re-generate this code using: baml-cli generate
# baml-cli is available with the baml package.
import os
import typing
import typing_extensions
import baml_py
from . import types, stream_types, type_builder
from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__, DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX as __ctx__manager__
class BamlCallOptions(typing.TypedDict, total=False):
tb: typing_extensions.NotRequired[type_builder.TypeBuilder]
client_registry: typing_extensions.NotRequired[baml_py.baml_py.ClientRegistry]
env: typing_extensions.NotRequired[typing.Dict[str, typing.Optional[str]]]
collector: typing_extensions.NotRequired[
typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]]
]
class _ResolvedBamlOptions:
tb: typing.Optional[baml_py.baml_py.TypeBuilder]
client_registry: typing.Optional[baml_py.baml_py.ClientRegistry]
collectors: typing.List[baml_py.baml_py.Collector]
env_vars: typing.Dict[str, str]
def __init__(
self,
tb: typing.Optional[baml_py.baml_py.TypeBuilder],
client_registry: typing.Optional[baml_py.baml_py.ClientRegistry],
collectors: typing.List[baml_py.baml_py.Collector],
env_vars: typing.Dict[str, str],
):
self.tb = tb
self.client_registry = client_registry
self.collectors = collectors
self.env_vars = env_vars
class DoNotUseDirectlyCallManager:
def __init__(self, baml_options: BamlCallOptions):
self.__baml_options = baml_options
def __getstate__(self):
# Return state needed for pickling
return {"baml_options": self.__baml_options}
def __setstate__(self, state):
# Restore state from pickling
self.__baml_options = state["baml_options"]
def __resolve(self) -> _ResolvedBamlOptions:
tb = self.__baml_options.get("tb")
if tb is not None:
baml_tb = tb._tb # type: ignore (we know how to use this private attribute)
else:
baml_tb = None
client_registry = self.__baml_options.get("client_registry")
collector = self.__baml_options.get("collector")
collectors_as_list = (
collector
if isinstance(collector, list)
else [collector] if collector is not None else []
)
env_vars = os.environ.copy()
for k, v in self.__baml_options.get("env", {}).items():
if v is not None:
env_vars[k] = v
else:
env_vars.pop(k, None)
return _ResolvedBamlOptions(
baml_tb,
client_registry,
collectors_as_list,
env_vars,
)
def merge_options(self, options: BamlCallOptions) -> "DoNotUseDirectlyCallManager":
return DoNotUseDirectlyCallManager({**self.__baml_options, **options})
async def call_function_async(
self, *, function_name: str, args: typing.Dict[str, typing.Any]
) -> baml_py.baml_py.FunctionResult:
resolved_options = self.__resolve()
return await __runtime__.call_function(
function_name,
args,
# ctx
__ctx__manager__.clone_context(),
# tb
resolved_options.tb,
# cr
resolved_options.client_registry,
# collectors
resolved_options.collectors,
# env_vars
resolved_options.env_vars,
)
def call_function_sync(
self, *, function_name: str, args: typing.Dict[str, typing.Any]
) -> baml_py.baml_py.FunctionResult:
resolved_options = self.__resolve()
ctx = __ctx__manager__.get()
return __runtime__.call_function_sync(
function_name,
args,
# ctx
ctx,
# tb
resolved_options.tb,
# cr
resolved_options.client_registry,
# collectors
resolved_options.collectors,
# env_vars
resolved_options.env_vars,
)
def create_async_stream(
self,
*,
function_name: str,
args: typing.Dict[str, typing.Any],
) -> typing.Tuple[baml_py.baml_py.RuntimeContextManager, baml_py.baml_py.FunctionResultStream]:
resolved_options = self.__resolve()
ctx = __ctx__manager__.clone_context()
result = __runtime__.stream_function(
function_name,
args,
# this is always None, we set this later!
# on_event
None,
# ctx
ctx,
# tb
resolved_options.tb,
# cr
resolved_options.client_registry,
# collectors
resolved_options.collectors,
# env_vars
resolved_options.env_vars,
)
return ctx, result
def create_sync_stream(
self,
*,
function_name: str,
args: typing.Dict[str, typing.Any],
) -> typing.Tuple[baml_py.baml_py.RuntimeContextManager, baml_py.baml_py.SyncFunctionResultStream]:
resolved_options = self.__resolve()
ctx = __ctx__manager__.get()
result = __runtime__.stream_function_sync(
function_name,
args,
# this is always None, we set this later!
# on_event
None,
# ctx
ctx,
# tb
resolved_options.tb,
# cr
resolved_options.client_registry,
# collectors
resolved_options.collectors,
# env_vars
resolved_options.env_vars,
)
return ctx, result
async def create_http_request_async(
self,
*,
function_name: str,
args: typing.Dict[str, typing.Any],
mode: typing_extensions.Literal["stream", "request"],
) -> baml_py.baml_py.HTTPRequest:
resolved_options = self.__resolve()
return await __runtime__.build_request(
function_name,
args,
# ctx
__ctx__manager__.clone_context(),
# tb
resolved_options.tb,
# cr
resolved_options.client_registry,
# env_vars
resolved_options.env_vars,
# is_stream
mode == "stream",
)
def create_http_request_sync(
self,
*,
function_name: str,
args: typing.Dict[str, typing.Any],
mode: typing_extensions.Literal["stream", "request"],
) -> baml_py.baml_py.HTTPRequest:
resolved_options = self.__resolve()
return __runtime__.build_request_sync(
function_name,
args,
# ctx
__ctx__manager__.get(),
# tb
resolved_options.tb,
# cr
resolved_options.client_registry,
# env_vars
resolved_options.env_vars,
# is_stream
mode == "stream",
)
def parse_response(self, *, function_name: str, llm_response: str, mode: typing_extensions.Literal["stream", "request"]) -> typing.Any:
resolved_options = self.__resolve()
return __runtime__.parse_llm_response(
function_name,
llm_response,
# enum_module
types,
# cls_module
types,
# partial_cls_module
stream_types,
# allow_partials
mode == "stream",
# ctx
__ctx__manager__.get(),
# tb
resolved_options.tb,
# cr
resolved_options.client_registry,
# env_vars
resolved_options.env_vars,
)

View file

@ -0,0 +1,50 @@
# ----------------------------------------------------------------------------
#
# Welcome to Baml! To use this generated code, please run the following:
#
# $ pip install baml
#
# ----------------------------------------------------------------------------
# This file was generated by BAML: please do not edit it. Instead, edit the
# BAML files and re-generate this code using: baml-cli generate
# baml-cli is available with the baml package.
import typing
import typing_extensions
from pydantic import BaseModel, ConfigDict
import baml_py
from . import types
StreamStateValueT = typing.TypeVar('StreamStateValueT')
class StreamState(BaseModel, typing.Generic[StreamStateValueT]):
value: StreamStateValueT
state: typing_extensions.Literal["Pending", "Incomplete", "Complete"]
# #########################################################################
# Generated classes (3)
# #########################################################################
class Edge(BaseModel):
# doc string for edge
# doc string for source_node_id
source_node_id: typing.Optional[str] = None
target_node_id: typing.Optional[str] = None
relationship_name: typing.Optional[str] = None
class KnowledgeGraph(BaseModel):
nodes: typing.List["types.Node"]
edges: typing.List["Edge"]
class Node(BaseModel):
model_config = ConfigDict(extra='allow')
id: typing.Optional[str] = None
name: typing.Optional[str] = None
type: typing.Optional[str] = None
description: typing.Optional[str] = None
# #########################################################################
# Generated type aliases (0)
# #########################################################################

View file

@ -0,0 +1,247 @@
# ----------------------------------------------------------------------------
#
# Welcome to Baml! To use this generated code, please run the following:
#
# $ pip install baml
#
# ----------------------------------------------------------------------------
# This file was generated by BAML: please do not edit it. Instead, edit the
# BAML files and re-generate this code using: baml-cli generate
# baml-cli is available with the baml package.
import typing
import typing_extensions
import baml_py
from . import stream_types, types, type_builder
from .parser import LlmResponseParser, LlmStreamParser
from .runtime import DoNotUseDirectlyCallManager, BamlCallOptions
from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME as __runtime__
class BamlSyncClient:
__options: DoNotUseDirectlyCallManager
__stream_client: "BamlStreamClient"
__http_request: "BamlHttpRequestClient"
__http_stream_request: "BamlHttpStreamRequestClient"
__llm_response_parser: LlmResponseParser
__llm_stream_parser: LlmStreamParser
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
self.__stream_client = BamlStreamClient(options)
self.__http_request = BamlHttpRequestClient(options)
self.__http_stream_request = BamlHttpStreamRequestClient(options)
self.__llm_response_parser = LlmResponseParser(options)
self.__llm_stream_parser = LlmStreamParser(options)
def __getstate__(self):
# Return state needed for pickling
return {"options": self.__options}
def __setstate__(self, state):
# Restore state from pickling
self.__options = state["options"]
self.__stream_client = BamlStreamClient(self.__options)
self.__http_request = BamlHttpRequestClient(self.__options)
self.__http_stream_request = BamlHttpStreamRequestClient(self.__options)
self.__llm_response_parser = LlmResponseParser(self.__options)
self.__llm_stream_parser = LlmStreamParser(self.__options)
def with_options(self,
tb: typing.Optional[type_builder.TypeBuilder] = None,
client_registry: typing.Optional[baml_py.baml_py.ClientRegistry] = None,
collector: typing.Optional[typing.Union[baml_py.baml_py.Collector, typing.List[baml_py.baml_py.Collector]]] = None,
env: typing.Optional[typing.Dict[str, typing.Optional[str]]] = None,
) -> "BamlSyncClient":
options: BamlCallOptions = {}
if tb is not None:
options["tb"] = tb
if client_registry is not None:
options["client_registry"] = client_registry
if collector is not None:
options["collector"] = collector
if env is not None:
options["env"] = env
return BamlSyncClient(self.__options.merge_options(options))
@property
def stream(self):
return self.__stream_client
@property
def request(self):
return self.__http_request
@property
def stream_request(self):
return self.__http_stream_request
@property
def parse(self):
return self.__llm_response_parser
@property
def parse_stream(self):
return self.__llm_stream_parser
def ExtractContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> types.KnowledgeGraph:
result = self.__options.merge_options(baml_options).call_function_sync(function_name="ExtractContentGraph", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
})
return typing.cast(types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__))
def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> types.KnowledgeGraph:
result = self.__options.merge_options(baml_options).call_function_sync(function_name="ExtractContentGraphGeneric", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
})
return typing.cast(types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__))
def ExtractContentGraphWithAnthropic(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> types.KnowledgeGraph:
result = self.__options.merge_options(baml_options).call_function_sync(function_name="ExtractContentGraphWithAnthropic", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
})
return typing.cast(types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__))
def ExtractContentGraphWithEnvPrompt(self, content: str,prompt_override: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> types.KnowledgeGraph:
result = self.__options.merge_options(baml_options).call_function_sync(function_name="ExtractContentGraphWithEnvPrompt", args={
"content": content,"prompt_override": prompt_override,
})
return typing.cast(types.KnowledgeGraph, result.cast_to(types, types, stream_types, False, __runtime__))
class BamlStreamClient:
__options: DoNotUseDirectlyCallManager
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
def ExtractContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]:
ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="ExtractContentGraph", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
})
return baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph](
result,
lambda x: typing.cast(stream_types.KnowledgeGraph, x.cast_to(types, types, stream_types, True, __runtime__)),
lambda x: typing.cast(types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]:
ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="ExtractContentGraphGeneric", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
})
return baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph](
result,
lambda x: typing.cast(stream_types.KnowledgeGraph, x.cast_to(types, types, stream_types, True, __runtime__)),
lambda x: typing.cast(types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
def ExtractContentGraphWithAnthropic(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]:
ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="ExtractContentGraphWithAnthropic", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
})
return baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph](
result,
lambda x: typing.cast(stream_types.KnowledgeGraph, x.cast_to(types, types, stream_types, True, __runtime__)),
lambda x: typing.cast(types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
def ExtractContentGraphWithEnvPrompt(self, content: str,prompt_override: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph]:
ctx, result = self.__options.merge_options(baml_options).create_sync_stream(function_name="ExtractContentGraphWithEnvPrompt", args={
"content": content,"prompt_override": prompt_override,
})
return baml_py.BamlSyncStream[stream_types.KnowledgeGraph, types.KnowledgeGraph](
result,
lambda x: typing.cast(stream_types.KnowledgeGraph, x.cast_to(types, types, stream_types, True, __runtime__)),
lambda x: typing.cast(types.KnowledgeGraph, x.cast_to(types, types, stream_types, False, __runtime__)),
ctx,
)
class BamlHttpRequestClient:
__options: DoNotUseDirectlyCallManager
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
def ExtractContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractContentGraph", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
}, mode="request")
return result
def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractContentGraphGeneric", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
}, mode="request")
return result
def ExtractContentGraphWithAnthropic(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractContentGraphWithAnthropic", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
}, mode="request")
return result
def ExtractContentGraphWithEnvPrompt(self, content: str,prompt_override: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractContentGraphWithEnvPrompt", args={
"content": content,"prompt_override": prompt_override,
}, mode="request")
return result
class BamlHttpStreamRequestClient:
__options: DoNotUseDirectlyCallManager
def __init__(self, options: DoNotUseDirectlyCallManager):
self.__options = options
def ExtractContentGraph(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractContentGraph", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
}, mode="stream")
return result
def ExtractContentGraphGeneric(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractContentGraphGeneric", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
}, mode="stream")
return result
def ExtractContentGraphWithAnthropic(self, content: str,mode: typing.Optional[typing.Union[typing_extensions.Literal['simple'], typing_extensions.Literal['base'], typing_extensions.Literal['guided'], typing_extensions.Literal['strict'], typing_extensions.Literal['custom']]] = None,custom_prompt_content: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractContentGraphWithAnthropic", args={
"content": content,"mode": mode,"custom_prompt_content": custom_prompt_content,
}, mode="stream")
return result
def ExtractContentGraphWithEnvPrompt(self, content: str,prompt_override: typing.Optional[str] = None,
baml_options: BamlCallOptions = {},
) -> baml_py.baml_py.HTTPRequest:
result = self.__options.merge_options(baml_options).create_http_request_sync(function_name="ExtractContentGraphWithEnvPrompt", args={
"content": content,"prompt_override": prompt_override,
}, mode="stream")
return result
b = BamlSyncClient(DoNotUseDirectlyCallManager({}))

View file

@ -0,0 +1,22 @@
# ----------------------------------------------------------------------------
#
# Welcome to Baml! To use this generated code, please run the following:
#
# $ pip install baml
#
# ----------------------------------------------------------------------------
# This file was generated by BAML: please do not edit it. Instead, edit the
# BAML files and re-generate this code using: baml-cli generate
# baml-cli is available with the baml package.
from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX
trace = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.trace_fn
set_tags = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.upsert_tags
def flush():
DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.flush()
on_log_event = DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_CTX.on_log_event
__all__ = ['trace', 'set_tags', "flush", "on_log_event"]

View file

@ -0,0 +1,208 @@
# ----------------------------------------------------------------------------
#
# Welcome to Baml! To use this generated code, please run the following:
#
# $ pip install baml
#
# ----------------------------------------------------------------------------
# This file was generated by BAML: please do not edit it. Instead, edit the
# BAML files and re-generate this code using: baml-cli generate
# baml-cli is available with the baml package.
import typing
from baml_py import type_builder
from baml_py import baml_py
from .globals import DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME
class TypeBuilder(type_builder.TypeBuilder):
def __init__(self):
super().__init__(classes=set(
["Edge","KnowledgeGraph","Node",]
), enums=set(
[]
), runtime=DO_NOT_USE_DIRECTLY_UNLESS_YOU_KNOW_WHAT_YOURE_DOING_RUNTIME)
# #########################################################################
# Generated enums 0
# #########################################################################
# #########################################################################
# Generated classes 3
# #########################################################################
@property
def Edge(self) -> "EdgeViewer":
return EdgeViewer(self)
@property
def KnowledgeGraph(self) -> "KnowledgeGraphViewer":
return KnowledgeGraphViewer(self)
@property
def Node(self) -> "NodeBuilder":
return NodeBuilder(self)
# #########################################################################
# Generated enums 0
# #########################################################################
# #########################################################################
# Generated classes 3
# #########################################################################
class EdgeAst:
def __init__(self, tb: type_builder.TypeBuilder):
_tb = tb._tb # type: ignore (we know how to use this private attribute)
self._bldr = _tb.class_("Edge")
self._properties: typing.Set[str] = set([ "source_node_id", "target_node_id", "relationship_name", ])
self._props = EdgeProperties(self._bldr, self._properties)
def type(self) -> baml_py.FieldType:
return self._bldr.field()
@property
def props(self) -> "EdgeProperties":
return self._props
class EdgeViewer(EdgeAst):
def __init__(self, tb: type_builder.TypeBuilder):
super().__init__(tb)
def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]:
return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties]
class EdgeProperties:
def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]):
self.__bldr = bldr
self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821
@property
def source_node_id(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("source_node_id"))
@property
def target_node_id(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("target_node_id"))
@property
def relationship_name(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("relationship_name"))
class KnowledgeGraphAst:
def __init__(self, tb: type_builder.TypeBuilder):
_tb = tb._tb # type: ignore (we know how to use this private attribute)
self._bldr = _tb.class_("KnowledgeGraph")
self._properties: typing.Set[str] = set([ "nodes", "edges", ])
self._props = KnowledgeGraphProperties(self._bldr, self._properties)
def type(self) -> baml_py.FieldType:
return self._bldr.field()
@property
def props(self) -> "KnowledgeGraphProperties":
return self._props
class KnowledgeGraphViewer(KnowledgeGraphAst):
def __init__(self, tb: type_builder.TypeBuilder):
super().__init__(tb)
def list_properties(self) -> typing.List[typing.Tuple[str, type_builder.ClassPropertyViewer]]:
return [(name, type_builder.ClassPropertyViewer(self._bldr.property(name))) for name in self._properties]
class KnowledgeGraphProperties:
def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]):
self.__bldr = bldr
self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821
@property
def nodes(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("nodes"))
@property
def edges(self) -> type_builder.ClassPropertyViewer:
return type_builder.ClassPropertyViewer(self.__bldr.property("edges"))
class NodeAst:
def __init__(self, tb: type_builder.TypeBuilder):
_tb = tb._tb # type: ignore (we know how to use this private attribute)
self._bldr = _tb.class_("Node")
self._properties: typing.Set[str] = set([ "id", "name", "type", "description", ])
self._props = NodeProperties(self._bldr, self._properties)
def type(self) -> baml_py.FieldType:
return self._bldr.field()
@property
def props(self) -> "NodeProperties":
return self._props
class NodeBuilder(NodeAst):
def __init__(self, tb: type_builder.TypeBuilder):
super().__init__(tb)
def add_property(self, name: str, type: baml_py.FieldType) -> baml_py.ClassPropertyBuilder:
if name in self._properties:
raise ValueError(f"Property {name} already exists.")
return self._bldr.property(name).type(type)
def list_properties(self) -> typing.List[typing.Tuple[str, baml_py.ClassPropertyBuilder]]:
return [(name, self._bldr.property(name)) for name in self._properties]
class NodeProperties:
def __init__(self, bldr: baml_py.ClassBuilder, properties: typing.Set[str]):
self.__bldr = bldr
self.__properties = properties # type: ignore (we know how to use this private attribute) # noqa: F821
def __getattr__(self, name: str) -> baml_py.ClassPropertyBuilder:
if name not in self.__properties:
raise AttributeError(f"Property {name} not found.")
return self.__bldr.property(name)
@property
def id(self) -> baml_py.ClassPropertyBuilder:
return self.__bldr.property("id")
@property
def name(self) -> baml_py.ClassPropertyBuilder:
return self.__bldr.property("name")
@property
def type(self) -> baml_py.ClassPropertyBuilder:
return self.__bldr.property("type")
@property
def description(self) -> baml_py.ClassPropertyBuilder:
return self.__bldr.property("description")

View file

@ -0,0 +1,29 @@
# ----------------------------------------------------------------------------
#
# Welcome to Baml! To use this generated code, please run the following:
#
# $ pip install baml
#
# ----------------------------------------------------------------------------
# This file was generated by BAML: please do not edit it. Instead, edit the
# BAML files and re-generate this code using: baml-cli generate
# baml-cli is available with the baml package.
from . import types
from . import stream_types
type_map = {
"types.Edge": types.Edge,
"stream_types.Edge": stream_types.Edge,
"types.KnowledgeGraph": types.KnowledgeGraph,
"stream_types.KnowledgeGraph": stream_types.KnowledgeGraph,
"types.Node": types.Node,
"stream_types.Node": stream_types.Node,
}

View file

@ -0,0 +1,68 @@
# ----------------------------------------------------------------------------
#
# Welcome to Baml! To use this generated code, please run the following:
#
# $ pip install baml
#
# ----------------------------------------------------------------------------
# This file was generated by BAML: please do not edit it. Instead, edit the
# BAML files and re-generate this code using: baml-cli generate
# baml-cli is available with the baml package.
import typing
import typing_extensions
from enum import Enum
from pydantic import BaseModel, ConfigDict
import baml_py
CheckT = typing_extensions.TypeVar('CheckT')
CheckName = typing_extensions.TypeVar('CheckName', bound=str)
class Check(BaseModel):
name: str
expression: str
status: str
class Checked(BaseModel, typing.Generic[CheckT, CheckName]):
value: CheckT
checks: typing.Dict[CheckName, Check]
def get_checks(checks: typing.Dict[CheckName, Check]) -> typing.List[Check]:
return list(checks.values())
def all_succeeded(checks: typing.Dict[CheckName, Check]) -> bool:
return all(check.status == "succeeded" for check in get_checks(checks))
# #########################################################################
# Generated enums (0)
# #########################################################################
# #########################################################################
# Generated classes (3)
# #########################################################################
class Edge(BaseModel):
# doc string for edge
# doc string for source_node_id
source_node_id: str
target_node_id: str
relationship_name: str
class KnowledgeGraph(BaseModel):
nodes: typing.List["Node"]
edges: typing.List["Edge"]
class Node(BaseModel):
model_config = ConfigDict(extra='allow')
id: str
name: str
type: str
description: str
# #########################################################################
# Generated type aliases (0)
# #########################################################################

View file

@ -0,0 +1,179 @@
import os
from typing import Optional
from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
from pydantic import model_validator
from baml_py import ClientRegistry
class LLMConfig(BaseSettings):
"""
Configuration settings for the LLM (Large Language Model) provider and related options.
Public instance variables include:
- llm_provider
- llm_model
- llm_endpoint
- llm_api_key
- llm_api_version
- llm_temperature
- llm_streaming
- llm_max_tokens
- transcription_model
- graph_prompt_path
- llm_rate_limit_enabled
- llm_rate_limit_requests
- llm_rate_limit_interval
- embedding_rate_limit_enabled
- embedding_rate_limit_requests
- embedding_rate_limit_interval
Public methods include:
- ensure_env_vars_for_ollama
- to_dict
"""
llm_provider: str = "openai"
llm_model: str = "gpt-4o-mini"
llm_endpoint: str = ""
llm_api_key: Optional[str] = None
llm_api_version: Optional[str] = None
llm_temperature: float = 0.0
llm_streaming: bool = False
llm_max_tokens: int = 16384
transcription_model: str = "whisper-1"
graph_prompt_path: str = "generate_graph_prompt.txt"
llm_rate_limit_enabled: bool = False
llm_rate_limit_requests: int = 60
llm_rate_limit_interval: int = 60 # in seconds (default is 60 requests per minute)
embedding_rate_limit_enabled: bool = False
embedding_rate_limit_requests: int = 60
embedding_rate_limit_interval: int = 60 # in seconds (default is 60 requests per minute)
baml_registry = ClientRegistry()
model_config = SettingsConfigDict(env_file=".env", extra="allow")
baml_registry.add_llm_client(name=llm_provider, provider=llm_provider, options={
"model": llm_model,
"temperature": llm_temperature,
"api_key": llm_api_key
})
# Sets MyAmazingClient as the primary client
baml_registry.set_primary('openai')
@model_validator(mode="after")
def ensure_env_vars_for_ollama(self) -> "LLMConfig":
"""
Validate required environment variables for the 'ollama' LLM provider.
Raises ValueError if some required environment variables are set without the others.
Only checks are performed when 'llm_provider' is set to 'ollama'.
Returns:
--------
- 'LLMConfig': The instance of LLMConfig after validation.
"""
if self.llm_provider != "ollama":
# Skip checks unless provider is "ollama"
return self
def is_env_set(var_name: str) -> bool:
"""
Check if a given environment variable is set and non-empty.
Parameters:
-----------
- var_name (str): The name of the environment variable to check.
Returns:
--------
- bool: True if the environment variable exists and is not empty, otherwise False.
"""
val = os.environ.get(var_name)
return val is not None and val.strip() != ""
#
# 1. Check LLM environment variables
#
llm_env_vars = {
"LLM_MODEL": is_env_set("LLM_MODEL"),
"LLM_ENDPOINT": is_env_set("LLM_ENDPOINT"),
"LLM_API_KEY": is_env_set("LLM_API_KEY"),
}
if any(llm_env_vars.values()) and not all(llm_env_vars.values()):
missing_llm = [key for key, is_set in llm_env_vars.items() if not is_set]
raise ValueError(
"You have set some but not all of the required environment variables "
f"for LLM usage (LLM_MODEL, LLM_ENDPOINT, LLM_API_KEY). Missing: {missing_llm}"
)
#
# 2. Check embedding environment variables
#
embedding_env_vars = {
"EMBEDDING_PROVIDER": is_env_set("EMBEDDING_PROVIDER"),
"EMBEDDING_MODEL": is_env_set("EMBEDDING_MODEL"),
"EMBEDDING_DIMENSIONS": is_env_set("EMBEDDING_DIMENSIONS"),
"HUGGINGFACE_TOKENIZER": is_env_set("HUGGINGFACE_TOKENIZER"),
}
if any(embedding_env_vars.values()) and not all(embedding_env_vars.values()):
missing_embed = [key for key, is_set in embedding_env_vars.items() if not is_set]
raise ValueError(
"You have set some but not all of the required environment variables "
"for embeddings (EMBEDDING_PROVIDER, EMBEDDING_MODEL, "
"EMBEDDING_DIMENSIONS, HUGGINGFACE_TOKENIZER). Missing: "
f"{missing_embed}"
)
return self
def to_dict(self) -> dict:
"""
Convert the LLMConfig instance into a dictionary representation.
Returns:
--------
- dict: A dictionary containing the configuration settings of the LLMConfig
instance.
"""
return {
"provider": self.llm_provider,
"model": self.llm_model,
"endpoint": self.llm_endpoint,
"api_key": self.llm_api_key,
"api_version": self.llm_api_version,
"temperature": self.llm_temperature,
"streaming": self.llm_streaming,
"max_tokens": self.llm_max_tokens,
"transcription_model": self.transcription_model,
"graph_prompt_path": self.graph_prompt_path,
"rate_limit_enabled": self.llm_rate_limit_enabled,
"rate_limit_requests": self.llm_rate_limit_requests,
"rate_limit_interval": self.llm_rate_limit_interval,
"embedding_rate_limit_enabled": self.embedding_rate_limit_enabled,
"embedding_rate_limit_requests": self.embedding_rate_limit_requests,
"embedding_rate_limit_interval": self.embedding_rate_limit_interval,
}
@lru_cache
def get_llm_config():
"""
Retrieve and cache the LLM configuration.
This function returns an instance of the LLMConfig class. It leverages
caching to ensure that repeated calls do not create new instances,
but instead return the already created configuration object.
Returns:
--------
- LLMConfig: An instance of the LLMConfig class containing the configuration for the
LLM.
"""
return LLMConfig()

View file

@ -0,0 +1,367 @@
class Node {
id string
name string
type string
description string
@@dynamic
}
/// doc string for edge
class Edge {
/// doc string for source_node_id
source_node_id string
target_node_id string
relationship_name string
}
class KnowledgeGraph {
nodes (Node @stream.done)[]
edges Edge[]
}
// Simple template for basic extraction (fast, good quality)
template_string ExtractContentGraphPrompt() #"
You are an advanced algorithm that extracts structured data into a knowledge graph.
- **Nodes**: Entities/concepts (like Wikipedia articles).
- **Edges**: Relationships (like Wikipedia links). Use snake_case (e.g., `acted_in`).
**Rules:**
1. **Node Labeling & IDs**
- Use basic types only (e.g., "Person", "Date", "Organization").
- Avoid overly specific or generic terms (e.g., no "Mathematician" or "Entity").
- Node IDs must be human-readable names from the text (no numbers).
2. **Dates & Numbers**
- Label dates as **"Date"** in "YYYY-MM-DD" format (use available parts if incomplete).
- Properties are key-value pairs; do not use escaped quotes.
3. **Coreference Resolution**
- Use a single, complete identifier for each entity (e.g., always "John Doe" not "Joe" or "he").
4. **Relationship Labels**:
- Use descriptive, lowercase, snake_case names for edges.
- *Example*: born_in, married_to, invented_by.
- Avoid vague or generic labels like isA, relatesTo, has.
- Avoid duplicated relationships like produces, produced by.
5. **Strict Compliance**
- Follow these rules exactly. Non-compliance results in termination.
"#
// Detailed template for complex extraction (slower, higher quality)
template_string DetailedExtractContentGraphPrompt() #"
You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
**Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
**Edges** represent relationships between concepts. They're akin to Wikipedia links.
The aim is to achieve simplicity and clarity in the knowledge graph.
# 1. Labeling Nodes
**Consistency**: Ensure you use basic or elementary types for node labels.
- For example, when you identify an entity representing a person, always label it as **"Person"**.
- Avoid using more specific terms like "Mathematician" or "Scientist", keep those as "profession" property.
- Don't use too generic terms like "Entity".
**Node IDs**: Never utilize integers as node IDs.
- Node IDs should be names or human-readable identifiers found in the text.
# 2. Handling Numerical Data and Dates
- For example, when you identify an entity representing a date, make sure it has type **"Date"**.
- Extract the date in the format "YYYY-MM-DD"
- If not possible to extract the whole date, extract month or year, or both if available.
- **Property Format**: Properties must be in a key-value format.
- **Quotation Marks**: Never use escaped single or double quotes within property values.
- **Naming Convention**: Use snake_case for relationship names, e.g., `acted_in`.
# 3. Coreference Resolution
- **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the Person's ID.
Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.
# 4. Strict Compliance
Adhere to the rules strictly. Non-compliance will result in termination.
"#
// Guided template with step-by-step instructions
template_string GuidedExtractContentGraphPrompt() #"
You are an advanced algorithm designed to extract structured information to build a clean, consistent, and human-readable knowledge graph.
**Objective**:
- Nodes represent entities and concepts, similar to Wikipedia articles.
- Edges represent typed relationships between nodes, similar to Wikipedia hyperlinks.
- The graph must be clear, minimal, consistent, and semantically precise.
**Node Guidelines**:
1. **Label Consistency**:
- Use consistent, basic types for all node labels.
- Do not switch between granular or vague labels for the same kind of entity.
- Pick one label for each category and apply it uniformly.
- Each entity type should be in a singular form and in a case of multiple words separated by whitespaces
2. **Node Identifiers**:
- Node IDs must be human-readable and derived directly from the text.
- Prefer full names and canonical terms.
- Never use integers or autogenerated IDs.
- *Example*: Use "Marie Curie", "Theory of Evolution", "Google".
3. **Coreference Resolution**:
- Maintain one consistent node ID for each real-world entity.
- Resolve aliases, acronyms, and pronouns to the most complete form.
- *Example*: Always use "John Doe" even if later referred to as "Doe" or "he".
**Edge Guidelines**:
4. **Relationship Labels**:
- Use descriptive, lowercase, snake_case names for edges.
- *Example*: born_in, married_to, invented_by.
- Avoid vague or generic labels like isA, relatesTo, has.
5. **Relationship Direction**:
- Edges must be directional and logically consistent.
- *Example*:
- "Marie Curie" —[born_in]→ "Warsaw"
- "Radioactivity" —[discovered_by]→ "Marie Curie"
**Compliance**:
Strict adherence to these guidelines is required. Any deviation will result in immediate termination of the task.
"#
// Strict template with zero-tolerance rules
template_string StrictExtractContentGraphPrompt() #"
You are a top-tier algorithm for **extracting structured information** from unstructured text to build a **knowledge graph**.
Your primary goal is to extract:
- **Nodes**: Representing **entities** and **concepts** (like Wikipedia nodes).
- **Edges**: Representing **relationships** between those concepts (like Wikipedia links).
The resulting knowledge graph must be **simple, consistent, and human-readable**.
## 1. Node Labeling and Identification
### Node Types
Use **basic atomic types** for node labels. Always prefer general types over specific roles or professions:
- "Person" for any human.
- "Organization" for companies, institutions, etc.
- "Location" for geographic or place entities.
- "Date" for any temporal expression.
- "Event" for historical or scheduled occurrences.
- "Work" for books, films, artworks, or research papers.
- "Concept" for abstract notions or ideas.
### Node IDs
- Always assign **human-readable and unambiguous identifiers**.
- Never use numeric or autogenerated IDs.
- Prioritize **most complete form** of entity names for consistency.
## 2. Relationship Handling
- Use **snake_case** for all relationship (edge) types.
- Keep relationship types semantically clear and consistent.
- Avoid vague relation names like "related_to" unless no better alternative exists.
## 3. Strict Compliance
Follow all rules exactly. Any deviation may lead to rejection or incorrect graph construction.
"#
// OpenAI client with environment model selection
client<llm> OpenAIClientWithEnvModel {
provider openai
options {
model env.LLM_MODEL
api_key env.OPENAI_API_KEY
}
}
// Anthropic client with environment model selection
client<llm> AnthropicClientWithEnvModel {
provider anthropic
options {
model env.LLM_MODEL
api_key env.ANTHROPIC_API_KEY
}
}
// Default client (maintains backward compatibility)
client<llm> DefaultClient {
provider openai
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
// Function that returns raw structured output (for custom objects - to be handled in Python)
function ExtractContentGraphGeneric(
content: string,
mode: "simple" | "base" | "guided" | "strict" | "custom"?,
custom_prompt_content: string?
) -> KnowledgeGraph {
client OpenAIClientWithEnvModel
prompt #"
{% if mode == "base" %}
{{ DetailedExtractContentGraphPrompt() }}
{% elif mode == "guided" %}
{{ GuidedExtractContentGraphPrompt() }}
{% elif mode == "strict" %}
{{ StrictExtractContentGraphPrompt() }}
{% elif mode == "custom" and custom_prompt_content %}
{{ custom_prompt_content }}
{% else %}
{{ ExtractContentGraphPrompt() }}
{% endif %}
{{ ctx.output_format(prefix="Answer in this schema:\n") }}
Before answering, briefly describe what you'll extract from the text, then provide the structured output.
Example format:
I'll extract the main entities and their relationships from this text...
{ ... }
{{ _.role('user') }}
{{ content }}
"#
}
// Backward-compatible function specifically for KnowledgeGraph
function ExtractContentGraph(
content: string,
mode: "simple" | "base" | "guided" | "strict" | "custom"?,
custom_prompt_content: string?
) -> KnowledgeGraph {
client OpenAIClientWithEnvModel
prompt #"
{% if mode == "base" %}
{{ DetailedExtractContentGraphPrompt() }}
{% elif mode == "guided" %}
{{ GuidedExtractContentGraphPrompt() }}
{% elif mode == "strict" %}
{{ StrictExtractContentGraphPrompt() }}
{% elif mode == "custom" and custom_prompt_content %}
{{ custom_prompt_content }}
{% else %}
{{ ExtractContentGraphPrompt() }}
{% endif %}
{{ ctx.output_format(prefix="Answer in this schema:\n") }}
Before answering, briefly describe what you'll extract from the text, then provide the structured output.
Example format:
I'll extract the main entities and their relationships from this text...
{ ... }
{{ _.role('user') }}
{{ content }}
"#
}
// Alternative function that uses environment variable for prompt selection
function ExtractContentGraphWithEnvPrompt(
content: string,
prompt_override: string?
) -> KnowledgeGraph {
client OpenAIClientWithEnvModel
prompt #"
{% if prompt_override %}
{{ prompt_override }}
{% else %}
{{ ExtractContentGraphPrompt() }}
{% endif %}
{{ ctx.output_format(prefix="Answer in this schema:\n") }}
Before answering, briefly describe what you'll extract from the text, then provide the structured output.
Example format:
I'll extract the main entities and their relationships from this text...
{ ... }
{{ _.role('user') }}
{{ content }}
"#
}
// Function that uses Anthropic client
function ExtractContentGraphWithAnthropic(
content: string,
mode: "simple" | "base" | "guided" | "strict" | "custom"?,
custom_prompt_content: string?
) -> KnowledgeGraph {
client AnthropicClientWithEnvModel
prompt #"
{% if mode == "base" %}
{{ DetailedExtractContentGraphPrompt() }}
{% elif mode == "guided" %}
{{ GuidedExtractContentGraphPrompt() }}
{% elif mode == "strict" %}
{{ StrictExtractContentGraphPrompt() }}
{% elif mode == "custom" and custom_prompt_content %}
{{ custom_prompt_content }}
{% else %}
{{ ExtractContentGraphPrompt() }}
{% endif %}
{{ ctx.output_format(prefix="Answer in this schema:\n") }}
Before answering, briefly describe what you'll extract from the text, then provide the structured output.
Example format:
I'll extract the main entities and their relationships from this text...
{ ... }
{{ _.role('user') }}
{{ content }}
"#
}
test ExtractPersonExample {
functions [ExtractContentGraph]
args {
content #"
My name is Vasiliy. I was born in 1992. I am a software engineer. I work at Google and am based in Berlin.
"#
mode "simple"
}
}
test ExtractGuidedExample {
functions [ExtractContentGraph]
args {
content #"
Apple Inc. was founded by Steve Jobs in 1976. The company is headquartered in Cupertino, California.
Tim Cook is the current CEO of Apple Inc.
"#
mode "guided"
}
}
test ExtractStrictExample {
functions [ExtractContentGraph]
args {
content #"
The Python programming language was created by Guido van Rossum in 1991.
"#
mode "strict"
}
}
test ExtractGenericExample {
functions [ExtractContentGraphGeneric]
args {
content #"
React is a JavaScript library for building user interfaces, developed by Facebook.
"#
mode "simple"
}
}

View file

@ -0,0 +1,18 @@
import os
from typing import Type
from pydantic import BaseModel
from cognee.infrastructure.llm.structured_output_framework.baml.async_client import b
from cognee.infrastructure.llm.structured_output_framework.baml.type_builder import TypeBuilder
from cognee.infrastructure.llm.structured_output_framework.baml_src.config import get_llm_config
async def extract_content_graph(content: str, response_model: Type[BaseModel]):
# tb = TypeBuilder()
config = get_llm_config()
# country = tb.union \
# ([tb.literal_string("USA"), tb.literal_string("UK"), tb.literal_string("Germany"), tb.literal_string("other")])
# tb.Node.add_property("country", country)
graph = await b.ExtractContentGraph(content, mode="simple", baml_options={ "tb": config.baml_registry})
return graph

View file

@ -0,0 +1,18 @@
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "python/pydantic"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../baml/"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.201.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode sync
}

View file

@ -0,0 +1 @@
from .knowledge_graph.extract_content_graph import extract_content_graph

View file

@ -1,7 +1,7 @@
from typing import Type, List
from typing import Type
from pydantic import BaseModel
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.prompts import read_query_prompt
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.get_llm_client import get_llm_client
async def extract_categories(content: str, response_model: Type[BaseModel]):

View file

@ -5,8 +5,8 @@ from typing import Type
from instructor.exceptions import InstructorRetryException
from pydantic import BaseModel
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.get_llm_client import get_llm_client
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.prompts import read_query_prompt
from cognee.shared.data_models import SummarizedCode
from cognee.tasks.summarization.mock_summary import get_mock_summarized_code

View file

@ -1,9 +1,9 @@
import os
from typing import Type
from pydantic import BaseModel
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.infrastructure.llm.prompts import render_prompt
from cognee.infrastructure.llm.config import get_llm_config
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.get_llm_client import get_llm_client
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.prompts import render_prompt
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.config import get_llm_config
async def extract_content_graph(content: str, response_model: Type[BaseModel]):

View file

@ -3,9 +3,9 @@ from pydantic import BaseModel
import instructor
from cognee.exceptions import InvalidValueError
from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.infrastructure.llm.rate_limiter import rate_limit_async, sleep_and_retry_async
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.prompts import read_query_prompt
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.rate_limiter import rate_limit_async, sleep_and_retry_async
class AnthropicAdapter(LLMInterface):

View file

@ -6,7 +6,7 @@ import time
import asyncio
import random
from cognee.shared.logging_utils import get_logger
from cognee.infrastructure.llm.config import get_llm_config
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.config import get_llm_config
logger = get_logger()

View file

@ -1,5 +1,4 @@
import litellm
import logging
from pydantic import BaseModel
from typing import Type, Optional
from litellm import acompletion, JSONSchemaValidationError
@ -7,9 +6,9 @@ from litellm import acompletion, JSONSchemaValidationError
from cognee.shared.logging_utils import get_logger
from cognee.modules.observability.get_observe import get_observe
from cognee.exceptions import InvalidValueError
from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.infrastructure.llm.rate_limiter import (
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.prompts import read_query_prompt
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.rate_limiter import (
rate_limit_async,
sleep_and_retry_async,
)

View file

@ -1,14 +1,11 @@
"""Adapter for Generic API LLM provider API"""
import logging
from typing import Type
from pydantic import BaseModel
import instructor
from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.config import get_llm_config
from cognee.infrastructure.llm.rate_limiter import rate_limit_async, sleep_and_retry_async
from cognee.shared.logging_utils import get_logger
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.rate_limiter import rate_limit_async, sleep_and_retry_async
import litellm

View file

@ -4,7 +4,7 @@ from enum import Enum
from cognee.exceptions import InvalidValueError
from cognee.infrastructure.llm import get_llm_config
from cognee.infrastructure.llm.ollama.adapter import OllamaAPIAdapter
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.ollama.adapter import OllamaAPIAdapter
# Define an Enum for LLM Providers
@ -48,7 +48,7 @@ def get_llm_client():
# Check if max_token value is defined in liteLLM for given model
# if not use value from cognee configuration
from cognee.infrastructure.llm.utils import (
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.utils import (
get_model_max_tokens,
) # imported here to avoid circular imports
@ -59,7 +59,7 @@ def get_llm_client():
if llm_config.llm_api_key is None:
raise InvalidValueError(message="LLM API key is not set.")
from .openai.adapter import OpenAIAdapter
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.openai.adapter import OpenAIAdapter
return OpenAIAdapter(
api_key=llm_config.llm_api_key,
@ -75,7 +75,7 @@ def get_llm_client():
if llm_config.llm_api_key is None:
raise InvalidValueError(message="LLM API key is not set.")
from .generic_llm_api.adapter import GenericAPIAdapter
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.generic_llm_api import GenericAPIAdapter
return OllamaAPIAdapter(
llm_config.llm_endpoint,
@ -86,7 +86,7 @@ def get_llm_client():
)
elif provider == LLMProvider.ANTHROPIC:
from .anthropic.adapter import AnthropicAdapter
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.anthropic import AnthropicAdapter
return AnthropicAdapter(max_tokens=max_tokens, model=llm_config.llm_model)
@ -94,7 +94,7 @@ def get_llm_client():
if llm_config.llm_api_key is None:
raise InvalidValueError(message="LLM API key is not set.")
from .generic_llm_api.adapter import GenericAPIAdapter
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.generic_llm_api import GenericAPIAdapter
return GenericAPIAdapter(
llm_config.llm_endpoint,
@ -108,7 +108,7 @@ def get_llm_client():
if llm_config.llm_api_key is None:
raise InvalidValueError(message="LLM API key is not set.")
from .gemini.adapter import GeminiAdapter
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.gemini import GeminiAdapter
return GeminiAdapter(
api_key=llm_config.llm_api_key,

View file

@ -3,7 +3,7 @@
from typing import Type, Protocol
from abc import abstractmethod
from pydantic import BaseModel
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.prompts import read_query_prompt
class LLMInterface(Protocol):

View file

@ -1,9 +1,8 @@
from typing import Type
from pydantic import BaseModel
import instructor
from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.config import get_llm_config
from cognee.infrastructure.llm.rate_limiter import (
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.rate_limiter import (
rate_limit_async,
rate_limit_sync,
sleep_and_retry_async,

View file

@ -7,9 +7,9 @@ from pydantic import BaseModel
from cognee.modules.data.processing.document_types.open_data_file import open_data_file
from cognee.exceptions import InvalidValueError
from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.infrastructure.llm.rate_limiter import (
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.prompts import read_query_prompt
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.rate_limiter import (
rate_limit_async,
rate_limit_sync,
sleep_and_retry_async,

View file

@ -49,12 +49,7 @@ from functools import wraps
from limits import RateLimitItemPerMinute, storage
from limits.strategies import MovingWindowRateLimiter
from cognee.shared.logging_utils import get_logger
from cognee.infrastructure.llm.config import get_llm_config
import threading
import logging
import functools
import openai
import os
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.config import get_llm_config
logger = get_logger()

View file

@ -1,4 +1,4 @@
from typing import List, Any, Union
from typing import List, Any
from ..tokenizer_interface import TokenizerInterface
@ -24,7 +24,7 @@ class GeminiTokenizer(TokenizerInterface):
# Get LLM API key from config
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
from cognee.infrastructure.llm.config import get_llm_config
from cognee.infrastructure.llm.structured_output_framework.llitellm_instructor.llm.config import get_llm_config
config = get_embedding_config()
llm_config = get_llm_config()

Some files were not shown because too many files have changed in this diff Show more