microcore

Minimalistic Foundation for AI Applications

microcore is a collection of python adapters for Large Language Models and Semantic Search APIs allowing to communicate with these services convenient way, make it easily switchable and separate business logic from implementation details.

  1"""
  2# Minimalistic Foundation for AI Applications
  3
  4**microcore** is a collection of python adapters for Large Language Models
  5and Semantic Search APIs allowing to
  6communicate with these services convenient way, make it easily switchable
  7and separate business logic from implementation details.
  8"""
  9
 10import os
 11import microcore.ui  # noqa
 12from .embedding_db import SearchResult, AbstractEmbeddingDB, SearchResults
 13from .file_storage import storage
 14from ._env import configure, env, config
 15from .logging import use_logging
 16from .message_types import UserMsg, AssistantMsg, SysMsg, Msg, PartialMsg
 17from .configuration import ApiType, LLMConfigError, Config
 18from .types import BadAIJsonAnswer, BadAIAnswer
 19from .wrappers.prompt_wrapper import PromptWrapper
 20from .wrappers.llm_response_wrapper import LLMResponse
 21from ._llm_functions import llm, allm, llm_parallel
 22from .utils import parse, dedent
 23from .metrics import Metrics
 24
 25
 26def tpl(file: os.PathLike[str] | str, **kwargs) -> str | PromptWrapper:
 27    """Renders a prompt template using the provided parameters."""
 28    return PromptWrapper(env().tpl_function(file, **kwargs), kwargs)
 29
 30
 31def prompt(template_str: str, remove_indent=True, **kwargs) -> str | PromptWrapper:
 32    """Renders a prompt template from string using the provided parameters."""
 33    if remove_indent:
 34        template_str = dedent(template_str)
 35    return PromptWrapper(
 36        env().jinja_env.from_string(template_str).render(**kwargs), kwargs
 37    )
 38
 39
 40fmt = prompt
 41
 42
 43def use_model(name: str):
 44    """Switches language model"""
 45    config().MODEL = name
 46    config().LLM_DEFAULT_ARGS["model"] = name
 47
 48
 49def validate_config():
 50    """
 51    Validates current MicroCore configuration
 52
 53    Raises:
 54        `LLMConfigError` if configuration is invalid
 55    """
 56    config().validate()
 57
 58
 59class _EmbeddingDBProxy(AbstractEmbeddingDB):
 60    def get_all(self, collection: str) -> list[str | SearchResult]:
 61        return env().texts.get_all(collection)
 62
 63    def search(
 64        self,
 65        collection: str,
 66        query: str | list,
 67        n_results: int = 5,
 68        where: dict = None,
 69        **kwargs,
 70    ) -> SearchResults | list[str | SearchResult]:
 71        return env().texts.search(collection, query, n_results, where, **kwargs)
 72
 73    def find(self, *args, **kwargs) -> SearchResults | list[str | SearchResult]:
 74        return self.search(*args, **kwargs)
 75
 76    def find_all(
 77        self,
 78        collection: str,
 79        query: str | list,
 80        where: dict = None,
 81        **kwargs,
 82    ) -> SearchResults | list[str | SearchResult]:
 83        return env().texts.find_all(collection, query, where, **kwargs)
 84
 85    def save_many(self, collection: str, items: list[tuple[str, dict] | str]):
 86        return env().texts.save_many(collection, items)
 87
 88    def save(self, collection: str, text: str, metadata: dict = None):
 89        return env().texts.save(collection, text, metadata)
 90
 91    def clear(self, collection: str):
 92        return env().texts.clear(collection)
 93
 94    def count(self, collection: str) -> int:
 95        return env().texts.count(collection)
 96
 97    def delete(self, collection: str, what: str | list[str] | dict):
 98        return env().texts.delete(collection, what)
 99
100
101texts = _EmbeddingDBProxy()
102"""Embedding database, see `microcore.embedding_db.AbstractEmbeddingDB`"""
103
104__all__ = [
105    "llm",
106    "allm",
107    "llm_parallel",
108    "tpl",
109    "prompt",
110    "fmt",
111    "texts",
112    "configure",
113    "validate_config",
114    "storage",
115    "use_model",
116    "use_logging",
117    "env",
118    "config",
119    "Msg",
120    "UserMsg",
121    "SysMsg",
122    "AssistantMsg",
123    "PartialMsg",
124    "ApiType",
125    "BadAIJsonAnswer",
126    "BadAIAnswer",
127    "LLMConfigError",
128    "LLMResponse",
129    "PromptWrapper",
130    "parse",
131    "SearchResult",
132    "SearchResults",
133    "dedent",
134    # submodules
135    "embedding_db",
136    "file_storage",
137    "message_types",
138    "utils",
139    "configuration",
140    "Config",
141    "types",
142    "ui",
143    "Metrics",
144    # "wrappers",
145]
146
147__version__ = "3.10.0"
def llm( prompt: str | Msg | list[str] | list[Msg], **kwargs) -> str | LLMResponse:
10def llm(prompt: str | Msg | list[str] | list[Msg], **kwargs) -> str | LLMResponse:
11    """
12    Request Large Language Model synchronously
13
14    Args:
15        prompt (str | list[str]): Text to send to LLM
16        **kwargs (dict): Parameters supported by the LLM API
17
18            See parameters supported by the OpenAI:
19
20            - https://platform.openai.com/docs/api-reference/completions/create
21            - https://platform.openai.com/docs/api-reference/chat/create
22
23            **Additional parameters:**
24
25                - callback: callable - callback function
26                to be called on each chunk of text,
27                enables response streaming if supported by the LLM API
28                - callbacks: list[callable] - collection of callbacks
29                to be called on each chunk of text,
30                enables response streaming if supported by the LLM API
31
32    Returns:
33
34        Text generated by the LLM as string
35        with all fields returned by API accessible as an attributes.
36
37        See fields returned by the OpenAI:
38
39        - https://platform.openai.com/docs/api-reference/completions/object
40        - https://platform.openai.com/docs/api-reference/chat/object
41    """
42    [h(prompt, **kwargs) for h in env().llm_before_handlers]
43    start = datetime.now()
44    response = env().llm_function(prompt, **kwargs)
45    try:
46        response.gen_duration = (datetime.now() - start).total_seconds()
47    except AttributeError:
48        ...
49    [h(response) for h in env().llm_after_handlers]
50    return response

Request Large Language Model synchronously

Arguments:
Returns:

Text generated by the LLM as string with all fields returned by API accessible as an attributes.

See fields returned by the OpenAI:

async def allm( prompt: str | Msg | list[str] | list[Msg], **kwargs) -> str | LLMResponse:
53async def allm(
54    prompt: str | Msg | list[str] | list[Msg], **kwargs
55) -> str | LLMResponse:
56    """
57    Request Large Language Model asynchronously
58
59    Args:
60        prompt (str | list[str]): Text to send to LLM
61        **kwargs (dict): Parameters supported by the LLM API
62
63            See parameters supported by the OpenAI:
64
65            - https://platform.openai.com/docs/api-reference/completions/create
66            - https://platform.openai.com/docs/api-reference/chat/create
67
68            **Additional parameters:**
69
70            - callback: callable - callback function
71            to be called on each chunk of text,
72            enables response streaming if supported by the LLM API
73            - callbacks: list[callable] - collection of callbacks
74            to be called on each chunk of text,
75            enables response streaming if supported by the LLM API
76
77            Note: async callbacks are supported only for async LLM API calls
78
79    Returns:
80
81        Text generated by the LLM as string
82        with all fields returned by API accessible as an attributes.
83
84        See fields returned by the OpenAI:
85
86        - https://platform.openai.com/docs/api-reference/completions/object
87        - https://platform.openai.com/docs/api-reference/chat/object
88    """
89    [h(prompt, **kwargs) for h in env().llm_before_handlers]
90    start = datetime.now()
91    response = await env().llm_async_function(prompt, **kwargs)
92    try:
93        response.gen_duration = (datetime.now() - start).total_seconds()
94    except AttributeError:
95        ...
96    [h(response) for h in env().llm_after_handlers]
97    return response

Request Large Language Model asynchronously

Arguments:
  • prompt (str | list[str]): Text to send to LLM
  • **kwargs (dict): Parameters supported by the LLM API

    See parameters supported by the OpenAI:

    Additional parameters:

    • callback: callable - callback function to be called on each chunk of text, enables response streaming if supported by the LLM API
    • callbacks: list[callable] - collection of callbacks to be called on each chunk of text, enables response streaming if supported by the LLM API

    Note: async callbacks are supported only for async LLM API calls

Returns:

Text generated by the LLM as string with all fields returned by API accessible as an attributes.

See fields returned by the OpenAI:

async def llm_parallel( prompts: list, max_concurrent_tasks: int = None, **kwargs) -> list[str] | list[LLMResponse]:
100async def llm_parallel(
101    prompts: list, max_concurrent_tasks: int = None, **kwargs
102) -> list[str] | list[LLMResponse]:
103    tasks = [allm(prompt, **kwargs) for prompt in prompts]
104
105    if max_concurrent_tasks is None:
106        max_concurrent_tasks = int(env().config.MAX_CONCURRENT_TASKS)
107    if not max_concurrent_tasks:
108        max_concurrent_tasks = len(tasks)
109
110    return await run_parallel(tasks, max_concurrent_tasks=max_concurrent_tasks)
def tpl( file: os.PathLike[str] | str, **kwargs) -> str | PromptWrapper:
27def tpl(file: os.PathLike[str] | str, **kwargs) -> str | PromptWrapper:
28    """Renders a prompt template using the provided parameters."""
29    return PromptWrapper(env().tpl_function(file, **kwargs), kwargs)

Renders a prompt template using the provided parameters.

def prompt( template_str: str, remove_indent=True, **kwargs) -> str | PromptWrapper:
32def prompt(template_str: str, remove_indent=True, **kwargs) -> str | PromptWrapper:
33    """Renders a prompt template from string using the provided parameters."""
34    if remove_indent:
35        template_str = dedent(template_str)
36    return PromptWrapper(
37        env().jinja_env.from_string(template_str).render(**kwargs), kwargs
38    )

Renders a prompt template from string using the provided parameters.

def fmt( template_str: str, remove_indent=True, **kwargs) -> str | PromptWrapper:
32def prompt(template_str: str, remove_indent=True, **kwargs) -> str | PromptWrapper:
33    """Renders a prompt template from string using the provided parameters."""
34    if remove_indent:
35        template_str = dedent(template_str)
36    return PromptWrapper(
37        env().jinja_env.from_string(template_str).render(**kwargs), kwargs
38    )

Renders a prompt template from string using the provided parameters.

texts = _EmbeddingDBProxy()
def configure(cfg: Config | dict | str = None, **kwargs):
154    def _config_builder_wrapper(cfg: Config | dict | str = None, **kwargs):
155        """
156        - Convert configuration keys to uppercase
157        - Add LLM_ prefix to keys if necessary
158        - Allow to configure from Config instance or dictionary
159        """
160        if cfg:
161            assert not kwargs, "Cannot pass both cfg and kwargs"
162        if isinstance(cfg, dict):
163            return _config_builder_wrapper(**cfg)
164        if isinstance(cfg, str):
165            if not os.path.isfile(cfg):
166                raise LLMConfigError(f"Configuration file not found: {cfg}")
167            return _config_builder_wrapper(Config(USE_DOT_ENV=True, DOT_ENV_FILE=cfg))
168        kwargs = {str(k).upper(): v for k, v in kwargs.items()}
169        for k in list(kwargs.keys()):
170            if not hasattr(Config, k) and (
171                hasattr(Config, key := f"LLM_{k}") or key in _fields
172            ):
173                kwargs[key] = kwargs.pop(k)
174        return _Configure(**(cfg and asdict(cfg) or kwargs))
  • Convert configuration keys to uppercase
  • Add LLM_ prefix to keys if necessary
  • Allow to configure from Config instance or dictionary
def validate_config():
50def validate_config():
51    """
52    Validates current MicroCore configuration
53
54    Raises:
55        `LLMConfigError` if configuration is invalid
56    """
57    config().validate()

Validates current MicroCore configuration

Raises:
storage = <microcore.file_storage.Storage object>
def use_model(name: str):
44def use_model(name: str):
45    """Switches language model"""
46    config().MODEL = name
47    config().LLM_DEFAULT_ARGS["model"] = name

Switches language model

def use_logging():
65def use_logging():
66    """Turns on logging of LLM requests and responses to console."""
67    if not is_notebook():
68        init(autoreset=True)
69    if _log_request not in env().llm_before_handlers:
70        env().llm_before_handlers.append(_log_request)
71    if _log_response not in env().llm_after_handlers:
72        env().llm_after_handlers.append(_log_response)

Turns on logging of LLM requests and responses to console.

def env() -> microcore._env.Env:
181def env() -> Env:
182    """Returns the current MicroCore environment"""
183    return _env or Env()

Returns the current MicroCore environment

def config() -> Config:
186def config() -> Config:
187    """Resolve current configuration"""
188    return env().config

Resolve current configuration

@dataclass
class Msg:
16@dataclass
17class Msg:
18    dict_factory = dict
19    role: str = field(default=DEFAULT_MESSAGE_ROLE)
20    content: str = field(default="")
21
22    def __str__(self):
23        return str(self.content)
24
25    def strip(self):
26        self.content = self.content.strip()
27        return self
Msg(role: str = 'user', content: str = '')
dict_factory = <class 'dict'>
role: str = 'user'
content: str = ''
def strip(self):
25    def strip(self):
26        self.content = self.content.strip()
27        return self
@dataclass
class UserMsg(microcore.Msg):
35@dataclass
36class UserMsg(Msg):
37    role: str = field(default=Role.USER, init=False)
UserMsg(content: str = '')
role: str = 'user'
Inherited Members
Msg
dict_factory
content
strip
@dataclass
class SysMsg(microcore.Msg):
30@dataclass
31class SysMsg(Msg):
32    role: str = field(default=Role.SYSTEM, init=False)
SysMsg(content: str = '')
role: str = 'system'
Inherited Members
Msg
dict_factory
content
strip
@dataclass
class AssistantMsg(microcore.Msg):
40@dataclass
41class AssistantMsg(Msg):
42    role: str = field(default=Role.ASSISTANT, init=False)
AssistantMsg(content: str = '')
role: str = 'assistant'
Inherited Members
Msg
dict_factory
content
strip
class PartialMsg(microcore.AssistantMsg):
45class PartialMsg(AssistantMsg):
46    """A message that is not fully formed yet."""
47
48    class _PartialMsgDict(dict):
49        is_partial = True
50        """Custom dictionary class to handle additional properties"""
51
52    dict_factory = _PartialMsgDict
53    placeholder = "<|placeholder|>"
54    variants_splitter = "<|or|>"
55
56    @staticmethod
57    def split_prefix_and_suffixes(content: str):
58        parts = content.split(PartialMsg.placeholder)
59        prefix = parts[0]
60        suffix = parts[1] if len(parts) > 1 else ""
61        suffixes = suffix.split(PartialMsg.variants_splitter) if suffix else []
62        return prefix, suffixes
63
64    def prefix_and_suffixes(self):
65        return self.split_prefix_and_suffixes(self.content)
66
67    def prefix(self):
68        prefix, _ = self.prefix_and_suffixes()
69        return prefix
70
71    def suffixes(self):
72        _, suffixes = self.prefix_and_suffixes()
73        return suffixes

A message that is not fully formed yet.

dict_factory = <class 'microcore.message_types.PartialMsg._PartialMsgDict'>
placeholder = '<|placeholder|>'
variants_splitter = '<|or|>'
@staticmethod
def split_prefix_and_suffixes(content: str):
56    @staticmethod
57    def split_prefix_and_suffixes(content: str):
58        parts = content.split(PartialMsg.placeholder)
59        prefix = parts[0]
60        suffix = parts[1] if len(parts) > 1 else ""
61        suffixes = suffix.split(PartialMsg.variants_splitter) if suffix else []
62        return prefix, suffixes
def prefix_and_suffixes(self):
64    def prefix_and_suffixes(self):
65        return self.split_prefix_and_suffixes(self.content)
def prefix(self):
67    def prefix(self):
68        prefix, _ = self.prefix_and_suffixes()
69        return prefix
def suffixes(self):
71    def suffixes(self):
72        _, suffixes = self.prefix_and_suffixes()
73        return suffixes
class ApiType:
60class ApiType:
61    """LLM API types"""
62
63    OPEN_AI = "open_ai"
64    AZURE = "azure"
65    """See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models"""
66    ANYSCALE = "anyscale"
67    """See https://www.anyscale.com/endpoints"""
68    DEEP_INFRA = "deep_infra"
69    """List of text generation models: https://deepinfra.com/models?type=text-generation"""
70    ANTHROPIC = "anthropic"
71    GOOGLE_VERTEX_AI = "google_vertex_ai"
72    GOOGLE_AI_STUDIO = "google_ai_studio"
73
74    # Local models
75    FUNCTION = "function"
76    TRANSFORMERS = "transformers"
77    NONE = "none"
78
79    @staticmethod
80    def is_local(api_type: str) -> bool:
81        return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)

LLM API types

OPEN_AI = 'open_ai'
ANYSCALE = 'anyscale'
DEEP_INFRA = 'deep_infra'

List of text generation models: https://deepinfra.com/models?type=text-generation

ANTHROPIC = 'anthropic'
GOOGLE_VERTEX_AI = 'google_vertex_ai'
GOOGLE_AI_STUDIO = 'google_ai_studio'
FUNCTION = 'function'
TRANSFORMERS = 'transformers'
NONE = 'none'
@staticmethod
def is_local(api_type: str) -> bool:
79    @staticmethod
80    def is_local(api_type: str) -> bool:
81        return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)
class BadAIJsonAnswer(microcore.BadAIAnswer):
25class BadAIJsonAnswer(BadAIAnswer):
26    def __init__(
27        self, message: str = "Invalid JSON generated by the LLM", details=None
28    ):
29        super().__init__(message, details)

Unprocessable response generated by the LLM

BadAIJsonAnswer(message: str = 'Invalid JSON generated by the LLM', details=None)
26    def __init__(
27        self, message: str = "Invalid JSON generated by the LLM", details=None
28    ):
29        super().__init__(message, details)
Inherited Members
BadAIAnswer
message
details
builtins.BaseException
with_traceback
add_note
args
class BadAIAnswer(builtins.ValueError):
13class BadAIAnswer(ValueError):
14    """Unprocessable response generated by the LLM"""
15
16    def __init__(self, message: str = None, details: str = None):
17        self.message = str(message or "Unprocessable response generated by the LLM")
18        self.details = details
19        super().__init__(self.message + (f": {self.details}" if self.details else ""))
20
21    def __str__(self):
22        return self.message + (f": {self.details}" if self.details else "")

Unprocessable response generated by the LLM

BadAIAnswer(message: str = None, details: str = None)
16    def __init__(self, message: str = None, details: str = None):
17        self.message = str(message or "Unprocessable response generated by the LLM")
18        self.details = details
19        super().__init__(self.message + (f": {self.details}" if self.details else ""))
message
details
Inherited Members
builtins.BaseException
with_traceback
add_note
args
class LLMConfigError(builtins.ValueError):
338class LLMConfigError(ValueError):
339    """LLM configuration error"""

LLM configuration error

Inherited Members
builtins.ValueError
ValueError
builtins.BaseException
with_traceback
add_note
args
10class LLMResponse(ExtendedString, ConvertableToMessage):
11    """
12    Response from the Large Language Model.
13
14    If treated as a string, it returns the text generated by the LLM.
15
16    Also, it contains all fields returned by the API accessible as an attributes.
17
18    See fields returned by the OpenAI:
19
20    - https://platform.openai.com/docs/api-reference/completions/object
21    - https://platform.openai.com/docs/api-reference/chat/object
22    """
23
24    def __new__(cls, string: str, attrs: dict = None):
25        attrs = {
26            **(attrs or {}),
27            "role": Role.ASSISTANT,
28            "content": str(string),
29            # generation duration in seconds (float), used in metrics
30            "gen_duration": None,
31        }
32        obj = ExtendedString.__new__(cls, string, attrs)
33        return obj
34
35    def parse_json(
36        self, raise_errors: bool = True, required_fields: list[str] = None
37    ) -> list | dict | float | int | str:
38        return parse_json(self.content, raise_errors, required_fields)
39
40    def parse_number(
41        self,
42        default=BadAIAnswer,
43        position="last",
44        dtype: type | str = float,
45        rounding: bool = False,
46    ) -> int | float | Any:
47        return extract_number(self.content, default, position, dtype, rounding)
48
49    def as_message(self) -> AssistantMsg:
50        return self.as_assistant

Response from the Large Language Model.

If treated as a string, it returns the text generated by the LLM.

Also, it contains all fields returned by the API accessible as an attributes.

See fields returned by the OpenAI:

def parse_json( self, raise_errors: bool = True, required_fields: list[str] = None) -> list | dict | float | int | str:
35    def parse_json(
36        self, raise_errors: bool = True, required_fields: list[str] = None
37    ) -> list | dict | float | int | str:
38        return parse_json(self.content, raise_errors, required_fields)
def parse_number( self, default=<class 'BadAIAnswer'>, position='last', dtype: type | str = <class 'float'>, rounding: bool = False) -> int | float | typing.Any:
40    def parse_number(
41        self,
42        default=BadAIAnswer,
43        position="last",
44        dtype: type | str = float,
45        rounding: bool = False,
46    ) -> int | float | Any:
47        return extract_number(self.content, default, position, dtype, rounding)
def as_message(self) -> AssistantMsg:
49    def as_message(self) -> AssistantMsg:
50        return self.as_assistant
Inherited Members
microcore.utils.ExtendedString
to_tokens
num_tokens
microcore.utils.ConvertableToMessage
as_user
as_system
as_assistant
as_model
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
find
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
 6class PromptWrapper(ExtendedString, ConvertableToMessage):
 7    def to_llm(self, **kwargs):
 8        """
 9        Send prompt to Large Language Model, see `llm`
10        """
11        return llm(self, **kwargs)
12
13    async def to_allm(self, **kwargs):
14        """
15        Send prompt to Large Language Model asynchronously, see `allm`
16        """
17        return await allm(self, **kwargs)

Provides a way of extending string with attributes and methods

PromptWrapper(string: str, attrs: dict = None)
55    def __new__(cls, string: str, attrs: dict = None):
56        """
57        Allows string to have attributes.
58        """
59        obj = str.__new__(cls, string)
60        if attrs:
61            for k, v in attrs.items():
62                setattr(obj, k, v)
63        return obj

Allows string to have attributes.

def to_llm(self, **kwargs):
 7    def to_llm(self, **kwargs):
 8        """
 9        Send prompt to Large Language Model, see `llm`
10        """
11        return llm(self, **kwargs)

Send prompt to Large Language Model, see llm

async def to_allm(self, **kwargs):
13    async def to_allm(self, **kwargs):
14        """
15        Send prompt to Large Language Model asynchronously, see `allm`
16        """
17        return await allm(self, **kwargs)

Send prompt to Large Language Model asynchronously, see allm

Inherited Members
microcore.utils.ExtendedString
to_tokens
num_tokens
microcore.utils.ConvertableToMessage
as_user
as_system
as_assistant
as_model
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
find
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
def parse( text: str, field_format: str = '\\[\\[(.*?)\\]\\]', required_fields: list = None) -> dict:
116def parse(
117    text: str, field_format: str = r"\[\[(.*?)\]\]", required_fields: list = None
118) -> dict:
119    """
120    Parse a document divided into sections and convert it into a dictionary.
121    """
122    pattern = rf"{field_format}\n(.*?)(?=\n{field_format}|$)"
123    matches = re.findall(pattern, text, re.DOTALL)
124    result = {key.strip().lower(): value for key, value, _ in matches}
125    if required_fields:
126        for field in required_fields:
127            if field not in result:
128                raise BadAIAnswer(f"Field '{field}' is required but not found")
129    return result

Parse a document divided into sections and convert it into a dictionary.

class SearchResult(microcore.utils.ExtendedString):
32class SearchResult(ExtendedString):
33    """
34    String containing the search result with additional information in attributes
35
36    Attributes:
37        id (str): document (text) identifier in embedding database
38        distance (float): The distance between the query and the search result
39        metadata (dict): A dictionary containing document metadata
40    """
41
42    id: str
43    distance: float
44    metadata: dict

String containing the search result with additional information in attributes

Attributes:
  • id (str): document (text) identifier in embedding database
  • distance (float): The distance between the query and the search result
  • metadata (dict): A dictionary containing document metadata
SearchResult(string: str, attrs: dict = None)
55    def __new__(cls, string: str, attrs: dict = None):
56        """
57        Allows string to have attributes.
58        """
59        obj = str.__new__(cls, string)
60        if attrs:
61            for k, v in attrs.items():
62                setattr(obj, k, v)
63        return obj

Allows string to have attributes.

id: str
distance: float
metadata: dict
Inherited Members
microcore.utils.ExtendedString
to_tokens
num_tokens
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
find
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
class SearchResults(builtins.list):
12class SearchResults(list):
13    def fit_to_token_size(
14            self,
15            max_tokens: int,
16            for_model: str = None,
17            encoding: str | tiktoken.Encoding = None,
18            verbose=True
19    ):
20        from ..tokenizing import fit_to_token_size
21        records, removed = fit_to_token_size(self, max_tokens, for_model, encoding)
22        if verbose and len(records) < len(self):
23            logging.info(
24                "For fitting %d records to %d tokens, %d records was removed",
25                len(self),
26                max_tokens,
27                removed
28            )
29        return SearchResults(list(records))

Built-in mutable sequence.

If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.

def fit_to_token_size( self, max_tokens: int, for_model: str = None, encoding: str | tiktoken.core.Encoding = None, verbose=True):
13    def fit_to_token_size(
14            self,
15            max_tokens: int,
16            for_model: str = None,
17            encoding: str | tiktoken.Encoding = None,
18            verbose=True
19    ):
20        from ..tokenizing import fit_to_token_size
21        records, removed = fit_to_token_size(self, max_tokens, for_model, encoding)
22        if verbose and len(records) < len(self):
23            logging.info(
24                "For fitting %d records to %d tokens, %d records was removed",
25                len(self),
26                max_tokens,
27                removed
28            )
29        return SearchResults(list(records))
Inherited Members
builtins.list
list
clear
copy
append
insert
extend
pop
remove
index
count
reverse
sort
def dedent(text: str) -> str:
286def dedent(text: str) -> str:
287    """
288    Removes minimal shared leading whitespace from each line
289    and strips leading and trailing empty lines.
290    """
291    lines = text.splitlines()
292    while lines and lines[0].strip() == "":
293        lines.pop(0)
294    while lines and lines[-1].strip() == "":
295        lines.pop()
296    non_empty_lines = [line for line in lines if line.strip()]
297    if non_empty_lines:
298        min_indent = min((len(line) - len(line.lstrip())) for line in non_empty_lines)
299        dedented_lines = [
300            line[min_indent:] if line and len(line) >= min_indent else line
301            for line in lines
302        ]
303    else:
304        dedented_lines = lines
305    return "\n".join(dedented_lines)

Removes minimal shared leading whitespace from each line and strips leading and trailing empty lines.

@dataclass
class Config(microcore.configuration.LLMConfig):
342@dataclass
343class Config(LLMConfig):
344    """MicroCore configuration"""
345
346    USE_LOGGING: bool = from_env(default=False)
347    """Whether to use logging or not, see `microcore.use_logging`"""
348
349    PROMPT_TEMPLATES_PATH: str | Path = from_env("tpl")
350    """Path to the folder with prompt templates, ./tpl by default"""
351
352    STORAGE_PATH: str | Path = from_env("storage")
353    """Path to the folder with file storage, ./storage by default"""
354
355    STORAGE_DEFAULT_FILE_EXT: str = from_env(default="")
356
357    EMBEDDING_DB_FOLDER: str = from_env(default="embedding_db")
358    """Folder within microcore.config.Config.STORAGE_PATH for storing embeddings"""
359
360    EMBEDDING_DB_FUNCTION: Any = from_env()
361
362    EMBEDDING_DB_ALLOW_DUPLICATES: bool = from_env(dtype=bool, default=False)
363
364    DEFAULT_ENCODING: str = from_env("utf-8")
365    """Used in file system operations, utf-8 by default"""
366
367    JINJA2_AUTO_ESCAPE: bool = from_env(dtype=bool, default=False)
368
369    ELEVENLABS_API_KEY: str = from_env()
370
371    TEXT_TO_SPEECH_PATH: str | Path = from_env()
372    """Path to the folder with generated voice files"""
373
374    MAX_CONCURRENT_TASKS: int = from_env(default=None)
375
376    def __post_init__(self):
377        super().__post_init__()
378        if self.TEXT_TO_SPEECH_PATH is None:
379            self.TEXT_TO_SPEECH_PATH = Path(self.STORAGE_PATH) / "voicing"

MicroCore configuration

Config( GOOGLE_VERTEX_ACCESS_TOKEN: str = <object object>, GOOGLE_VERTEX_PROJECT_ID: str = <object object>, GOOGLE_VERTEX_LOCATION: str = <object object>, GOOGLE_VERTEX_GCLOUD_AUTH: bool = <object object>, GOOGLE_VERTEX_RESPONSE_VALIDATION: bool = <object object>, GOOGLE_GEMINI_SAFETY_SETTINGS: dict = <object object>, ANTHROPIC_API_KEY: str = <object object>, OPENAI_API_TYPE: str = <object object>, OPENAI_API_KEY: str = <object object>, OPENAI_API_BASE: str = <object object>, OPENAI_API_VERSION: str = <object object>, USE_DOT_ENV: bool = None, DOT_ENV_FILE: str | pathlib.Path = None, LLM_API_TYPE: str = <object object>, LLM_API_KEY: str = <object object>, LLM_API_BASE: str = <object object>, LLM_API_VERSION: str = <object object>, LLM_DEPLOYMENT_ID: str = <object object>, MODEL: str = <object object>, TIKTOKEN_ENCODING: str = <object object>, LLM_DEFAULT_ARGS: dict = <object object>, AZURE_DEPLOYMENT_ID: str = <object object>, INFERENCE_FUNC: Union[Callable, str] = <object object>, CHAT_MODE: bool = <object object>, INIT_PARAMS: dict = <object object>, USE_LOGGING: bool = <object object>, PROMPT_TEMPLATES_PATH: str | pathlib.Path = <object object>, STORAGE_PATH: str | pathlib.Path = <object object>, STORAGE_DEFAULT_FILE_EXT: str = <object object>, EMBEDDING_DB_FOLDER: str = <object object>, EMBEDDING_DB_FUNCTION: Any = <object object>, EMBEDDING_DB_ALLOW_DUPLICATES: bool = <object object>, DEFAULT_ENCODING: str = <object object>, JINJA2_AUTO_ESCAPE: bool = <object object>, ELEVENLABS_API_KEY: str = <object object>, TEXT_TO_SPEECH_PATH: str | pathlib.Path = <object object>, MAX_CONCURRENT_TASKS: int = <object object>)
USE_LOGGING: bool = <object object>

Whether to use logging or not, see use_logging

PROMPT_TEMPLATES_PATH: str | pathlib.Path = <object object>

Path to the folder with prompt templates, ./tpl by default

STORAGE_PATH: str | pathlib.Path = <object object>

Path to the folder with file storage, ./storage by default

STORAGE_DEFAULT_FILE_EXT: str = <object object>
EMBEDDING_DB_FOLDER: str = <object object>

Folder within Config.STORAGE_PATH for storing embeddings

EMBEDDING_DB_FUNCTION: Any = <object object>
EMBEDDING_DB_ALLOW_DUPLICATES: bool = <object object>
DEFAULT_ENCODING: str = <object object>

Used in file system operations, utf-8 by default

JINJA2_AUTO_ESCAPE: bool = <object object>
ELEVENLABS_API_KEY: str = <object object>
TEXT_TO_SPEECH_PATH: str | pathlib.Path = <object object>

Path to the folder with generated voice files

MAX_CONCURRENT_TASKS: int = <object object>
class Metrics:
 8class Metrics:
 9    def __init__(self):
10        self._start: float = 0
11        self.exec_duration: float = 0
12        self.total_gen_duration: float = 0
13        self.requests_count: int = 0
14        self.succ_requests_count: int = 0
15        self.gen_chars_count: int = 0
16        self.avg_gen_duration: float = 0
17        self.gen_chars_speed: float = 0
18
19    def __enter__(self):
20        self._start = time.time()
21
22        env().llm_before_handlers.append(self._before_llm)
23        env().llm_after_handlers.append(self._after_llm)
24        return self
25
26    def __exit__(self, exc_type, exc_value, traceback):
27        self.exec_duration = time.time() - self._start
28        env().llm_before_handlers.remove(self._before_llm)
29        env().llm_after_handlers.remove(self._after_llm)
30
31    def _before_llm(self, prompt, **kwargs):  # pylint: disable=unused-argument
32        self.requests_count += 1
33
34    def _after_llm(self, response: str | LLMResponse):
35        self.succ_requests_count += 1
36        self.gen_chars_count += len(response) if isinstance(response, str) else 0
37        self.total_gen_duration += (
38            response.gen_duration if isinstance(response, LLMResponse) else 0
39        )
40        self.avg_gen_duration = self.total_gen_duration / self.succ_requests_count
41        self.gen_chars_speed = (self.gen_chars_count or 1) / (
42            self.total_gen_duration or 1
43        )
exec_duration: float
total_gen_duration: float
requests_count: int
succ_requests_count: int
gen_chars_count: int
avg_gen_duration: float
gen_chars_speed: float