microcore

Minimalistic Foundation for AI Applications

microcore is a collection of python adapters for Large Language Models and Semantic Search APIs allowing to communicate with these services convenient way, make it easily switchable and separate business logic from implementation details.

View Source

  1"""
  2# Minimalistic Foundation for AI Applications
  3
  4**microcore** is a collection of python adapters for Large Language Models
  5and Semantic Search APIs allowing to
  6communicate with these services convenient way, make it easily switchable
  7and separate business logic from implementation details.
  8"""
  9
 10import os
 11import microcore.ui  # noqa
 12from .embedding_db import SearchResult, AbstractEmbeddingDB, SearchResults
 13from .file_storage import storage
 14from ._env import configure, env, config
 15from .logging import use_logging
 16from .message_types import UserMsg, AssistantMsg, SysMsg, Msg, PartialMsg
 17from .configuration import ApiType, LLMConfigError, Config
 18from .types import BadAIJsonAnswer, BadAIAnswer
 19from .wrappers.prompt_wrapper import PromptWrapper
 20from .wrappers.llm_response_wrapper import LLMResponse
 21from ._llm_functions import llm, allm, llm_parallel
 22from .utils import parse, dedent
 23from .metrics import Metrics
 24
 25
 26def tpl(file: os.PathLike[str] | str, **kwargs) -> str | PromptWrapper:
 27    """Renders a prompt template using the provided parameters."""
 28    return PromptWrapper(env().tpl_function(file, **kwargs), kwargs)
 29
 30
 31def prompt(template_str: str, remove_indent=True, **kwargs) -> str | PromptWrapper:
 32    """Renders a prompt template from string using the provided parameters."""
 33    if remove_indent:
 34        template_str = dedent(template_str)
 35    return PromptWrapper(
 36        env().jinja_env.from_string(template_str).render(**kwargs), kwargs
 37    )
 38
 39
 40fmt = prompt
 41
 42
 43def use_model(name: str):
 44    """Switches language model"""
 45    config().MODEL = name
 46    config().LLM_DEFAULT_ARGS["model"] = name
 47
 48
 49def validate_config():
 50    """
 51    Validates current MicroCore configuration
 52
 53    Raises:
 54        `LLMConfigError` if configuration is invalid
 55    """
 56    config().validate()
 57
 58
 59class _EmbeddingDBProxy(AbstractEmbeddingDB):
 60    def get_all(self, collection: str) -> list[str | SearchResult]:
 61        return env().texts.get_all(collection)
 62
 63    def search(
 64        self,
 65        collection: str,
 66        query: str | list,
 67        n_results: int = 5,
 68        where: dict = None,
 69        **kwargs,
 70    ) -> SearchResults | list[str | SearchResult]:
 71        return env().texts.search(collection, query, n_results, where, **kwargs)
 72
 73    def find(self, *args, **kwargs) -> SearchResults | list[str | SearchResult]:
 74        return self.search(*args, **kwargs)
 75
 76    def find_all(
 77        self,
 78        collection: str,
 79        query: str | list,
 80        where: dict = None,
 81        **kwargs,
 82    ) -> SearchResults | list[str | SearchResult]:
 83        return env().texts.find_all(collection, query, where, **kwargs)
 84
 85    def save_many(self, collection: str, items: list[tuple[str, dict] | str]):
 86        return env().texts.save_many(collection, items)
 87
 88    def save(self, collection: str, text: str, metadata: dict = None):
 89        return env().texts.save(collection, text, metadata)
 90
 91    def clear(self, collection: str):
 92        return env().texts.clear(collection)
 93
 94    def count(self, collection: str) -> int:
 95        return env().texts.count(collection)
 96
 97    def delete(self, collection: str, what: str | list[str] | dict):
 98        return env().texts.delete(collection, what)
 99
100
101texts = _EmbeddingDBProxy()
102"""Embedding database, see `microcore.embedding_db.AbstractEmbeddingDB`"""
103
104__all__ = [
105    "llm",
106    "allm",
107    "llm_parallel",
108    "tpl",
109    "prompt",
110    "fmt",
111    "texts",
112    "configure",
113    "validate_config",
114    "storage",
115    "use_model",
116    "use_logging",
117    "env",
118    "config",
119    "Msg",
120    "UserMsg",
121    "SysMsg",
122    "AssistantMsg",
123    "PartialMsg",
124    "ApiType",
125    "BadAIJsonAnswer",
126    "BadAIAnswer",
127    "LLMConfigError",
128    "LLMResponse",
129    "PromptWrapper",
130    "parse",
131    "SearchResult",
132    "SearchResults",
133    "dedent",
134    # submodules
135    "embedding_db",
136    "file_storage",
137    "message_types",
138    "utils",
139    "configuration",
140    "Config",
141    "types",
142    "ui",
143    "Metrics",
144    # "wrappers",
145]
146
147__version__ = "3.10.0"

def llm( prompt: str | Msg | list[str] | list[Msg], **kwargs) -> str | LLMResponse: View Source

10def llm(prompt: str | Msg | list[str] | list[Msg], **kwargs) -> str | LLMResponse:
11    """
12    Request Large Language Model synchronously
13
14    Args:
15        prompt (str | list[str]): Text to send to LLM
16        **kwargs (dict): Parameters supported by the LLM API
17
18            See parameters supported by the OpenAI:
19
20            - https://platform.openai.com/docs/api-reference/completions/create
21            - https://platform.openai.com/docs/api-reference/chat/create
22
23            **Additional parameters:**
24
25                - callback: callable - callback function
26                to be called on each chunk of text,
27                enables response streaming if supported by the LLM API
28                - callbacks: list[callable] - collection of callbacks
29                to be called on each chunk of text,
30                enables response streaming if supported by the LLM API
31
32    Returns:
33
34        Text generated by the LLM as string
35        with all fields returned by API accessible as an attributes.
36
37        See fields returned by the OpenAI:
38
39        - https://platform.openai.com/docs/api-reference/completions/object
40        - https://platform.openai.com/docs/api-reference/chat/object
41    """
42    [h(prompt, **kwargs) for h in env().llm_before_handlers]
43    start = datetime.now()
44    response = env().llm_function(prompt, **kwargs)
45    try:
46        response.gen_duration = (datetime.now() - start).total_seconds()
47    except AttributeError:
48        ...
49    [h(response) for h in env().llm_after_handlers]
50    return response

Request Large Language Model synchronously

Arguments:

prompt (str | list[str]): Text to send to LLM

**kwargs (dict): Parameters supported by the LLM API

See parameters supported by the OpenAI:

Additional parameters:

- callback: callable - callback function
to be called on each chunk of text,
enables response streaming if supported by the LLM API
- callbacks: list[callable] - collection of callbacks
to be called on each chunk of text,
enables response streaming if supported by the LLM API

Returns:

Text generated by the LLM as string with all fields returned by API accessible as an attributes.

See fields returned by the OpenAI:

https://platform.openai.com/docs/api-reference/completions/object

https://platform.openai.com/docs/api-reference/chat/object

async def allm( prompt: str | Msg | list[str] | list[Msg], **kwargs) -> str | LLMResponse: View Source

53async def allm(
54    prompt: str | Msg | list[str] | list[Msg], **kwargs
55) -> str | LLMResponse:
56    """
57    Request Large Language Model asynchronously
58
59    Args:
60        prompt (str | list[str]): Text to send to LLM
61        **kwargs (dict): Parameters supported by the LLM API
62
63            See parameters supported by the OpenAI:
64
65            - https://platform.openai.com/docs/api-reference/completions/create
66            - https://platform.openai.com/docs/api-reference/chat/create
67
68            **Additional parameters:**
69
70            - callback: callable - callback function
71            to be called on each chunk of text,
72            enables response streaming if supported by the LLM API
73            - callbacks: list[callable] - collection of callbacks
74            to be called on each chunk of text,
75            enables response streaming if supported by the LLM API
76
77            Note: async callbacks are supported only for async LLM API calls
78
79    Returns:
80
81        Text generated by the LLM as string
82        with all fields returned by API accessible as an attributes.
83
84        See fields returned by the OpenAI:
85
86        - https://platform.openai.com/docs/api-reference/completions/object
87        - https://platform.openai.com/docs/api-reference/chat/object
88    """
89    [h(prompt, **kwargs) for h in env().llm_before_handlers]
90    start = datetime.now()
91    response = await env().llm_async_function(prompt, **kwargs)
92    try:
93        response.gen_duration = (datetime.now() - start).total_seconds()
94    except AttributeError:
95        ...
96    [h(response) for h in env().llm_after_handlers]
97    return response

Request Large Language Model asynchronously

Arguments:

prompt (str | list[str]): Text to send to LLM
**kwargs (dict): Parameters supported by the LLM API

See parameters supported by the OpenAI:
- https://platform.openai.com/docs/api-reference/completions/create
- https://platform.openai.com/docs/api-reference/chat/create
Additional parameters:
- callback: callable - callback function to be called on each chunk of text, enables response streaming if supported by the LLM API
- callbacks: list[callable] - collection of callbacks to be called on each chunk of text, enables response streaming if supported by the LLM API
Note: async callbacks are supported only for async LLM API calls

Returns:

Text generated by the LLM as string with all fields returned by API accessible as an attributes.

See fields returned by the OpenAI:

https://platform.openai.com/docs/api-reference/completions/object

https://platform.openai.com/docs/api-reference/chat/object

async def llm_parallel( prompts: list, max_concurrent_tasks: int = None, **kwargs) -> list[str] | list[LLMResponse]: View Source

100async def llm_parallel(
101    prompts: list, max_concurrent_tasks: int = None, **kwargs
102) -> list[str] | list[LLMResponse]:
103    tasks = [allm(prompt, **kwargs) for prompt in prompts]
104
105    if max_concurrent_tasks is None:
106        max_concurrent_tasks = int(env().config.MAX_CONCURRENT_TASKS)
107    if not max_concurrent_tasks:
108        max_concurrent_tasks = len(tasks)
109
110    return await run_parallel(tasks, max_concurrent_tasks=max_concurrent_tasks)

def tpl( file: os.PathLike[str] | str, **kwargs) -> str | PromptWrapper: View Source

27def tpl(file: os.PathLike[str] | str, **kwargs) -> str | PromptWrapper:
28    """Renders a prompt template using the provided parameters."""
29    return PromptWrapper(env().tpl_function(file, **kwargs), kwargs)

Renders a prompt template using the provided parameters.

def prompt( template_str: str, remove_indent=True, **kwargs) -> str | PromptWrapper: View Source

32def prompt(template_str: str, remove_indent=True, **kwargs) -> str | PromptWrapper:
33    """Renders a prompt template from string using the provided parameters."""
34    if remove_indent:
35        template_str = dedent(template_str)
36    return PromptWrapper(
37        env().jinja_env.from_string(template_str).render(**kwargs), kwargs
38    )

Renders a prompt template from string using the provided parameters.

def fmt( template_str: str, remove_indent=True, **kwargs) -> str | PromptWrapper: View Source

32def prompt(template_str: str, remove_indent=True, **kwargs) -> str | PromptWrapper:
33    """Renders a prompt template from string using the provided parameters."""
34    if remove_indent:
35        template_str = dedent(template_str)
36    return PromptWrapper(
37        env().jinja_env.from_string(template_str).render(**kwargs), kwargs
38    )

Renders a prompt template from string using the provided parameters.

texts = _EmbeddingDBProxy()

Embedding database, see microcore.embedding_db.AbstractEmbeddingDB

def configure(cfg: Config | dict | str = None, **kwargs): View Source

154    def _config_builder_wrapper(cfg: Config | dict | str = None, **kwargs):
155        """
156        - Convert configuration keys to uppercase
157        - Add LLM_ prefix to keys if necessary
158        - Allow to configure from Config instance or dictionary
159        """
160        if cfg:
161            assert not kwargs, "Cannot pass both cfg and kwargs"
162        if isinstance(cfg, dict):
163            return _config_builder_wrapper(**cfg)
164        if isinstance(cfg, str):
165            if not os.path.isfile(cfg):
166                raise LLMConfigError(f"Configuration file not found: {cfg}")
167            return _config_builder_wrapper(Config(USE_DOT_ENV=True, DOT_ENV_FILE=cfg))
168        kwargs = {str(k).upper(): v for k, v in kwargs.items()}
169        for k in list(kwargs.keys()):
170            if not hasattr(Config, k) and (
171                hasattr(Config, key := f"LLM_{k}") or key in _fields
172            ):
173                kwargs[key] = kwargs.pop(k)
174        return _Configure(**(cfg and asdict(cfg) or kwargs))

Convert configuration keys to uppercase
Add LLM_ prefix to keys if necessary
Allow to configure from Config instance or dictionary

def validate_config(): View Source

50def validate_config():
51    """
52    Validates current MicroCore configuration
53
54    Raises:
55        `LLMConfigError` if configuration is invalid
56    """
57    config().validate()

Validates current MicroCore configuration

Raises:

LLMConfigError if configuration is invalid

storage = <microcore.file_storage.Storage object>

def use_model(name: str): View Source

44def use_model(name: str):
45    """Switches language model"""
46    config().MODEL = name
47    config().LLM_DEFAULT_ARGS["model"] = name

Switches language model

def use_logging(): View Source

65def use_logging():
66    """Turns on logging of LLM requests and responses to console."""
67    if not is_notebook():
68        init(autoreset=True)
69    if _log_request not in env().llm_before_handlers:
70        env().llm_before_handlers.append(_log_request)
71    if _log_response not in env().llm_after_handlers:
72        env().llm_after_handlers.append(_log_response)

Turns on logging of LLM requests and responses to console.

def env() -> microcore._env.Env: View Source

181def env() -> Env:
182    """Returns the current MicroCore environment"""
183    return _env or Env()

Returns the current MicroCore environment

def config() -> Config: View Source

186def config() -> Config:
187    """Resolve current configuration"""
188    return env().config

Resolve current configuration

@dataclass

class Msg: View Source

16@dataclass
17class Msg:
18    dict_factory = dict
19    role: str = field(default=DEFAULT_MESSAGE_ROLE)
20    content: str = field(default="")
21
22    def __str__(self):
23        return str(self.content)
24
25    def strip(self):
26        self.content = self.content.strip()
27        return self

Msg(role: str = 'user', content: str = '')

dict_factory = <class 'dict'>

role: str = 'user'

content: str = ''

def strip(self): View Source

25    def strip(self):
26        self.content = self.content.strip()
27        return self

@dataclass

class UserMsg(microcore.Msg): View Source

35@dataclass
36class UserMsg(Msg):
37    role: str = field(default=Role.USER, init=False)

UserMsg(content: str = '')

role: str = 'user'

Inherited Members

Msg: dict_factory; content; strip

@dataclass

class SysMsg(microcore.Msg): View Source

30@dataclass
31class SysMsg(Msg):
32    role: str = field(default=Role.SYSTEM, init=False)

SysMsg(content: str = '')

role: str = 'system'

Inherited Members

Msg: dict_factory; content; strip

@dataclass

class AssistantMsg(microcore.Msg): View Source

40@dataclass
41class AssistantMsg(Msg):
42    role: str = field(default=Role.ASSISTANT, init=False)

AssistantMsg(content: str = '')

role: str = 'assistant'

Inherited Members

Msg: dict_factory; content; strip

class PartialMsg(microcore.AssistantMsg): View Source

45class PartialMsg(AssistantMsg):
46    """A message that is not fully formed yet."""
47
48    class _PartialMsgDict(dict):
49        is_partial = True
50        """Custom dictionary class to handle additional properties"""
51
52    dict_factory = _PartialMsgDict
53    placeholder = "<|placeholder|>"
54    variants_splitter = "<|or|>"
55
56    @staticmethod
57    def split_prefix_and_suffixes(content: str):
58        parts = content.split(PartialMsg.placeholder)
59        prefix = parts[0]
60        suffix = parts[1] if len(parts) > 1 else ""
61        suffixes = suffix.split(PartialMsg.variants_splitter) if suffix else []
62        return prefix, suffixes
63
64    def prefix_and_suffixes(self):
65        return self.split_prefix_and_suffixes(self.content)
66
67    def prefix(self):
68        prefix, _ = self.prefix_and_suffixes()
69        return prefix
70
71    def suffixes(self):
72        _, suffixes = self.prefix_and_suffixes()
73        return suffixes

A message that is not fully formed yet.

dict_factory = <class 'microcore.message_types.PartialMsg._PartialMsgDict'>

placeholder = '<|placeholder|>'

variants_splitter = '<|or|>'

@staticmethod

def split_prefix_and_suffixes(content: str): View Source

56    @staticmethod
57    def split_prefix_and_suffixes(content: str):
58        parts = content.split(PartialMsg.placeholder)
59        prefix = parts[0]
60        suffix = parts[1] if len(parts) > 1 else ""
61        suffixes = suffix.split(PartialMsg.variants_splitter) if suffix else []
62        return prefix, suffixes

def prefix_and_suffixes(self): View Source

64    def prefix_and_suffixes(self):
65        return self.split_prefix_and_suffixes(self.content)

def prefix(self): View Source

67    def prefix(self):
68        prefix, _ = self.prefix_and_suffixes()
69        return prefix

def suffixes(self): View Source

71    def suffixes(self):
72        _, suffixes = self.prefix_and_suffixes()
73        return suffixes

class ApiType: View Source

60class ApiType:
61    """LLM API types"""
62
63    OPEN_AI = "open_ai"
64    AZURE = "azure"
65    """See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models"""
66    ANYSCALE = "anyscale"
67    """See https://www.anyscale.com/endpoints"""
68    DEEP_INFRA = "deep_infra"
69    """List of text generation models: https://deepinfra.com/models?type=text-generation"""
70    ANTHROPIC = "anthropic"
71    GOOGLE_VERTEX_AI = "google_vertex_ai"
72    GOOGLE_AI_STUDIO = "google_ai_studio"
73
74    # Local models
75    FUNCTION = "function"
76    TRANSFORMERS = "transformers"
77    NONE = "none"
78
79    @staticmethod
80    def is_local(api_type: str) -> bool:
81        return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)

LLM API types

OPEN_AI = 'open_ai'

AZURE = 'azure'

See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models

ANYSCALE = 'anyscale'

See https://www.anyscale.com/endpoints

DEEP_INFRA = 'deep_infra'

List of text generation models: https://deepinfra.com/models?type=text-generation

ANTHROPIC = 'anthropic'

GOOGLE_VERTEX_AI = 'google_vertex_ai'

GOOGLE_AI_STUDIO = 'google_ai_studio'

FUNCTION = 'function'

TRANSFORMERS = 'transformers'

NONE = 'none'

@staticmethod

def is_local(api_type: str) -> bool: View Source

79    @staticmethod
80    def is_local(api_type: str) -> bool:
81        return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)

class BadAIJsonAnswer(microcore.BadAIAnswer): View Source

25class BadAIJsonAnswer(BadAIAnswer):
26    def __init__(
27        self, message: str = "Invalid JSON generated by the LLM", details=None
28    ):
29        super().__init__(message, details)

Unprocessable response generated by the LLM

BadAIJsonAnswer(message: str = 'Invalid JSON generated by the LLM', details=None) View Source

26    def __init__(
27        self, message: str = "Invalid JSON generated by the LLM", details=None
28    ):
29        super().__init__(message, details)

Inherited Members

BadAIAnswer: message; details
builtins.BaseException: with_traceback; add_note; args

class BadAIAnswer(builtins.ValueError): View Source

13class BadAIAnswer(ValueError):
14    """Unprocessable response generated by the LLM"""
15
16    def __init__(self, message: str = None, details: str = None):
17        self.message = str(message or "Unprocessable response generated by the LLM")
18        self.details = details
19        super().__init__(self.message + (f": {self.details}" if self.details else ""))
20
21    def __str__(self):
22        return self.message + (f": {self.details}" if self.details else "")

Unprocessable response generated by the LLM

BadAIAnswer(message: str = None, details: str = None) View Source

16    def __init__(self, message: str = None, details: str = None):
17        self.message = str(message or "Unprocessable response generated by the LLM")
18        self.details = details
19        super().__init__(self.message + (f": {self.details}" if self.details else ""))

message

details

Inherited Members

builtins.BaseException: with_traceback; add_note; args

class LLMConfigError(builtins.ValueError): View Source

338class LLMConfigError(ValueError):
339    """LLM configuration error"""

LLM configuration error

Inherited Members

builtins.ValueError: ValueError
builtins.BaseException: with_traceback; add_note; args

class LLMResponse(microcore.utils.ExtendedString, microcore.utils.ConvertableToMessage): View Source

10class LLMResponse(ExtendedString, ConvertableToMessage):
11    """
12    Response from the Large Language Model.
13
14    If treated as a string, it returns the text generated by the LLM.
15
16    Also, it contains all fields returned by the API accessible as an attributes.
17
18    See fields returned by the OpenAI:
19
20    - https://platform.openai.com/docs/api-reference/completions/object
21    - https://platform.openai.com/docs/api-reference/chat/object
22    """
23
24    def __new__(cls, string: str, attrs: dict = None):
25        attrs = {
26            **(attrs or {}),
27            "role": Role.ASSISTANT,
28            "content": str(string),
29            # generation duration in seconds (float), used in metrics
30            "gen_duration": None,
31        }
32        obj = ExtendedString.__new__(cls, string, attrs)
33        return obj
34
35    def parse_json(
36        self, raise_errors: bool = True, required_fields: list[str] = None
37    ) -> list | dict | float | int | str:
38        return parse_json(self.content, raise_errors, required_fields)
39
40    def parse_number(
41        self,
42        default=BadAIAnswer,
43        position="last",
44        dtype: type | str = float,
45        rounding: bool = False,
46    ) -> int | float | Any:
47        return extract_number(self.content, default, position, dtype, rounding)
48
49    def as_message(self) -> AssistantMsg:
50        return self.as_assistant

Response from the Large Language Model.

If treated as a string, it returns the text generated by the LLM.

Also, it contains all fields returned by the API accessible as an attributes.

See fields returned by the OpenAI:

def parse_json( self, raise_errors: bool = True, required_fields: list[str] = None) -> list | dict | float | int | str: View Source

35    def parse_json(
36        self, raise_errors: bool = True, required_fields: list[str] = None
37    ) -> list | dict | float | int | str:
38        return parse_json(self.content, raise_errors, required_fields)

def parse_number( self, default=<class 'BadAIAnswer'>, position='last', dtype: type | str = <class 'float'>, rounding: bool = False) -> int | float | typing.Any: View Source

40    def parse_number(
41        self,
42        default=BadAIAnswer,
43        position="last",
44        dtype: type | str = float,
45        rounding: bool = False,
46    ) -> int | float | Any:
47        return extract_number(self.content, default, position, dtype, rounding)

def as_message(self) -> AssistantMsg: View Source

49    def as_message(self) -> AssistantMsg:
50        return self.as_assistant

Inherited Members

microcore.utils.ExtendedString: to_tokens; num_tokens
microcore.utils.ConvertableToMessage: as_user; as_system; as_assistant; as_model
builtins.str: encode; replace; split; rsplit; join; capitalize; casefold; title; center; count; expandtabs; find; partition; index; ljust; lower; lstrip; rfind; rindex; rjust; rstrip; rpartition; splitlines; strip; swapcase; translate; upper; startswith; endswith; removeprefix; removesuffix; isascii; islower; isupper; istitle; isspace; isdecimal; isdigit; isnumeric; isalpha; isalnum; isidentifier; isprintable; zfill; format; format_map; maketrans

class PromptWrapper(microcore.utils.ExtendedString, microcore.utils.ConvertableToMessage): View Source

 6class PromptWrapper(ExtendedString, ConvertableToMessage):
 7    def to_llm(self, **kwargs):
 8        """
 9        Send prompt to Large Language Model, see `llm`
10        """
11        return llm(self, **kwargs)
12
13    async def to_allm(self, **kwargs):
14        """
15        Send prompt to Large Language Model asynchronously, see `allm`
16        """
17        return await allm(self, **kwargs)

Provides a way of extending string with attributes and methods

PromptWrapper(string: str, attrs: dict = None) View Source

55    def __new__(cls, string: str, attrs: dict = None):
56        """
57        Allows string to have attributes.
58        """
59        obj = str.__new__(cls, string)
60        if attrs:
61            for k, v in attrs.items():
62                setattr(obj, k, v)
63        return obj

Allows string to have attributes.

def to_llm(self, **kwargs): View Source

 7    def to_llm(self, **kwargs):
 8        """
 9        Send prompt to Large Language Model, see `llm`
10        """
11        return llm(self, **kwargs)

Send prompt to Large Language Model, see llm

async def to_allm(self, **kwargs): View Source

13    async def to_allm(self, **kwargs):
14        """
15        Send prompt to Large Language Model asynchronously, see `allm`
16        """
17        return await allm(self, **kwargs)

Send prompt to Large Language Model asynchronously, see allm

Inherited Members

microcore.utils.ExtendedString: to_tokens; num_tokens
microcore.utils.ConvertableToMessage: as_user; as_system; as_assistant; as_model
builtins.str: encode; replace; split; rsplit; join; capitalize; casefold; title; center; count; expandtabs; find; partition; index; ljust; lower; lstrip; rfind; rindex; rjust; rstrip; rpartition; splitlines; strip; swapcase; translate; upper; startswith; endswith; removeprefix; removesuffix; isascii; islower; isupper; istitle; isspace; isdecimal; isdigit; isnumeric; isalpha; isalnum; isidentifier; isprintable; zfill; format; format_map; maketrans

def parse( text: str, field_format: str = '\\[\\[(.*?)\\]\\]', required_fields: list = None) -> dict: View Source

116def parse(
117    text: str, field_format: str = r"\[\[(.*?)\]\]", required_fields: list = None
118) -> dict:
119    """
120    Parse a document divided into sections and convert it into a dictionary.
121    """
122    pattern = rf"{field_format}\n(.*?)(?=\n{field_format}|$)"
123    matches = re.findall(pattern, text, re.DOTALL)
124    result = {key.strip().lower(): value for key, value, _ in matches}
125    if required_fields:
126        for field in required_fields:
127            if field not in result:
128                raise BadAIAnswer(f"Field '{field}' is required but not found")
129    return result

Parse a document divided into sections and convert it into a dictionary.

class SearchResult(microcore.utils.ExtendedString): View Source

32class SearchResult(ExtendedString):
33    """
34    String containing the search result with additional information in attributes
35
36    Attributes:
37        id (str): document (text) identifier in embedding database
38        distance (float): The distance between the query and the search result
39        metadata (dict): A dictionary containing document metadata
40    """
41
42    id: str
43    distance: float
44    metadata: dict

String containing the search result with additional information in attributes

Attributes:

id (str): document (text) identifier in embedding database
distance (float): The distance between the query and the search result
metadata (dict): A dictionary containing document metadata

SearchResult(string: str, attrs: dict = None) View Source

55    def __new__(cls, string: str, attrs: dict = None):
56        """
57        Allows string to have attributes.
58        """
59        obj = str.__new__(cls, string)
60        if attrs:
61            for k, v in attrs.items():
62                setattr(obj, k, v)
63        return obj

Allows string to have attributes.

id: str

distance: float

metadata: dict

Inherited Members

microcore.utils.ExtendedString: to_tokens; num_tokens
builtins.str: encode; replace; split; rsplit; join; capitalize; casefold; title; center; count; expandtabs; find; partition; index; ljust; lower; lstrip; rfind; rindex; rjust; rstrip; rpartition; splitlines; strip; swapcase; translate; upper; startswith; endswith; removeprefix; removesuffix; isascii; islower; isupper; istitle; isspace; isdecimal; isdigit; isnumeric; isalpha; isalnum; isidentifier; isprintable; zfill; format; format_map; maketrans

class SearchResults(builtins.list): View Source

12class SearchResults(list):
13    def fit_to_token_size(
14            self,
15            max_tokens: int,
16            for_model: str = None,
17            encoding: str | tiktoken.Encoding = None,
18            verbose=True
19    ):
20        from ..tokenizing import fit_to_token_size
21        records, removed = fit_to_token_size(self, max_tokens, for_model, encoding)
22        if verbose and len(records) < len(self):
23            logging.info(
24                "For fitting %d records to %d tokens, %d records was removed",
25                len(self),
26                max_tokens,
27                removed
28            )
29        return SearchResults(list(records))

Built-in mutable sequence.

If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.

def fit_to_token_size( self, max_tokens: int, for_model: str = None, encoding: str | tiktoken.core.Encoding = None, verbose=True): View Source

13    def fit_to_token_size(
14            self,
15            max_tokens: int,
16            for_model: str = None,
17            encoding: str | tiktoken.Encoding = None,
18            verbose=True
19    ):
20        from ..tokenizing import fit_to_token_size
21        records, removed = fit_to_token_size(self, max_tokens, for_model, encoding)
22        if verbose and len(records) < len(self):
23            logging.info(
24                "For fitting %d records to %d tokens, %d records was removed",
25                len(self),
26                max_tokens,
27                removed
28            )
29        return SearchResults(list(records))

Inherited Members

builtins.list: list; clear; copy; append; insert; extend; pop; remove; index; count; reverse; sort

def dedent(text: str) -> str: View Source

286def dedent(text: str) -> str:
287    """
288    Removes minimal shared leading whitespace from each line
289    and strips leading and trailing empty lines.
290    """
291    lines = text.splitlines()
292    while lines and lines[0].strip() == "":
293        lines.pop(0)
294    while lines and lines[-1].strip() == "":
295        lines.pop()
296    non_empty_lines = [line for line in lines if line.strip()]
297    if non_empty_lines:
298        min_indent = min((len(line) - len(line.lstrip())) for line in non_empty_lines)
299        dedented_lines = [
300            line[min_indent:] if line and len(line) >= min_indent else line
301            for line in lines
302        ]
303    else:
304        dedented_lines = lines
305    return "\n".join(dedented_lines)

Removes minimal shared leading whitespace from each line and strips leading and trailing empty lines.

@dataclass

class Config(microcore.configuration.LLMConfig): View Source

342@dataclass
343class Config(LLMConfig):
344    """MicroCore configuration"""
345
346    USE_LOGGING: bool = from_env(default=False)
347    """Whether to use logging or not, see `microcore.use_logging`"""
348
349    PROMPT_TEMPLATES_PATH: str | Path = from_env("tpl")
350    """Path to the folder with prompt templates, ./tpl by default"""
351
352    STORAGE_PATH: str | Path = from_env("storage")
353    """Path to the folder with file storage, ./storage by default"""
354
355    STORAGE_DEFAULT_FILE_EXT: str = from_env(default="")
356
357    EMBEDDING_DB_FOLDER: str = from_env(default="embedding_db")
358    """Folder within microcore.config.Config.STORAGE_PATH for storing embeddings"""
359
360    EMBEDDING_DB_FUNCTION: Any = from_env()
361
362    EMBEDDING_DB_ALLOW_DUPLICATES: bool = from_env(dtype=bool, default=False)
363
364    DEFAULT_ENCODING: str = from_env("utf-8")
365    """Used in file system operations, utf-8 by default"""
366
367    JINJA2_AUTO_ESCAPE: bool = from_env(dtype=bool, default=False)
368
369    ELEVENLABS_API_KEY: str = from_env()
370
371    TEXT_TO_SPEECH_PATH: str | Path = from_env()
372    """Path to the folder with generated voice files"""
373
374    MAX_CONCURRENT_TASKS: int = from_env(default=None)
375
376    def __post_init__(self):
377        super().__post_init__()
378        if self.TEXT_TO_SPEECH_PATH is None:
379            self.TEXT_TO_SPEECH_PATH = Path(self.STORAGE_PATH) / "voicing"

MicroCore configuration

Config( GOOGLE_VERTEX_ACCESS_TOKEN: str = <object object>, GOOGLE_VERTEX_PROJECT_ID: str = <object object>, GOOGLE_VERTEX_LOCATION: str = <object object>, GOOGLE_VERTEX_GCLOUD_AUTH: bool = <object object>, GOOGLE_VERTEX_RESPONSE_VALIDATION: bool = <object object>, GOOGLE_GEMINI_SAFETY_SETTINGS: dict = <object object>, ANTHROPIC_API_KEY: str = <object object>, OPENAI_API_TYPE: str = <object object>, OPENAI_API_KEY: str = <object object>, OPENAI_API_BASE: str = <object object>, OPENAI_API_VERSION: str = <object object>, USE_DOT_ENV: bool = None, DOT_ENV_FILE: str | pathlib.Path = None, LLM_API_TYPE: str = <object object>, LLM_API_KEY: str = <object object>, LLM_API_BASE: str = <object object>, LLM_API_VERSION: str = <object object>, LLM_DEPLOYMENT_ID: str = <object object>, MODEL: str = <object object>, TIKTOKEN_ENCODING: str = <object object>, LLM_DEFAULT_ARGS: dict = <object object>, AZURE_DEPLOYMENT_ID: str = <object object>, INFERENCE_FUNC: Union[Callable, str] = <object object>, CHAT_MODE: bool = <object object>, INIT_PARAMS: dict = <object object>, USE_LOGGING: bool = <object object>, PROMPT_TEMPLATES_PATH: str | pathlib.Path = <object object>, STORAGE_PATH: str | pathlib.Path = <object object>, STORAGE_DEFAULT_FILE_EXT: str = <object object>, EMBEDDING_DB_FOLDER: str = <object object>, EMBEDDING_DB_FUNCTION: Any = <object object>, EMBEDDING_DB_ALLOW_DUPLICATES: bool = <object object>, DEFAULT_ENCODING: str = <object object>, JINJA2_AUTO_ESCAPE: bool = <object object>, ELEVENLABS_API_KEY: str = <object object>, TEXT_TO_SPEECH_PATH: str | pathlib.Path = <object object>, MAX_CONCURRENT_TASKS: int = <object object>)

USE_LOGGING: bool = <object object>

Whether to use logging or not, see use_logging

PROMPT_TEMPLATES_PATH: str | pathlib.Path = <object object>

Path to the folder with prompt templates, ./tpl by default

STORAGE_PATH: str | pathlib.Path = <object object>

Path to the folder with file storage, ./storage by default

STORAGE_DEFAULT_FILE_EXT: str = <object object>

EMBEDDING_DB_FOLDER: str = <object object>

Folder within Config.STORAGE_PATH for storing embeddings

EMBEDDING_DB_FUNCTION: Any = <object object>

EMBEDDING_DB_ALLOW_DUPLICATES: bool = <object object>

DEFAULT_ENCODING: str = <object object>

Used in file system operations, utf-8 by default

JINJA2_AUTO_ESCAPE: bool = <object object>

ELEVENLABS_API_KEY: str = <object object>

TEXT_TO_SPEECH_PATH: str | pathlib.Path = <object object>

Path to the folder with generated voice files

MAX_CONCURRENT_TASKS: int = <object object>

Inherited Members

microcore.configuration.LLMConfig: LLM_API_TYPE; LLM_API_KEY; LLM_API_BASE; LLM_API_VERSION; LLM_DEPLOYMENT_ID; MODEL; TIKTOKEN_ENCODING; LLM_DEFAULT_ARGS; AZURE_DEPLOYMENT_ID; INFERENCE_FUNC; CHAT_MODE; INIT_PARAMS; uses_local_model; validate; describe
microcore.configuration.BaseConfig: USE_DOT_ENV; DOT_ENV_FILE
microcore.configuration._OpenAIEnvVars: OPENAI_API_TYPE; OPENAI_API_KEY; OPENAI_API_BASE; OPENAI_API_VERSION
microcore.configuration._AnthropicEnvVars: ANTHROPIC_API_KEY
microcore.configuration._GoogleVertexAiEnvVars: GOOGLE_VERTEX_ACCESS_TOKEN; GOOGLE_VERTEX_PROJECT_ID; GOOGLE_VERTEX_LOCATION; GOOGLE_VERTEX_GCLOUD_AUTH; GOOGLE_VERTEX_RESPONSE_VALIDATION; GOOGLE_GEMINI_SAFETY_SETTINGS

class Metrics: View Source

 8class Metrics:
 9    def __init__(self):
10        self._start: float = 0
11        self.exec_duration: float = 0
12        self.total_gen_duration: float = 0
13        self.requests_count: int = 0
14        self.succ_requests_count: int = 0
15        self.gen_chars_count: int = 0
16        self.avg_gen_duration: float = 0
17        self.gen_chars_speed: float = 0
18
19    def __enter__(self):
20        self._start = time.time()
21
22        env().llm_before_handlers.append(self._before_llm)
23        env().llm_after_handlers.append(self._after_llm)
24        return self
25
26    def __exit__(self, exc_type, exc_value, traceback):
27        self.exec_duration = time.time() - self._start
28        env().llm_before_handlers.remove(self._before_llm)
29        env().llm_after_handlers.remove(self._after_llm)
30
31    def _before_llm(self, prompt, **kwargs):  # pylint: disable=unused-argument
32        self.requests_count += 1
33
34    def _after_llm(self, response: str | LLMResponse):
35        self.succ_requests_count += 1
36        self.gen_chars_count += len(response) if isinstance(response, str) else 0
37        self.total_gen_duration += (
38            response.gen_duration if isinstance(response, LLMResponse) else 0
39        )
40        self.avg_gen_duration = self.total_gen_duration / self.succ_requests_count
41        self.gen_chars_speed = (self.gen_chars_count or 1) / (
42            self.total_gen_duration or 1
43        )

exec_duration: float

total_gen_duration: float

requests_count: int

succ_requests_count: int

gen_chars_count: int

avg_gen_duration: float

gen_chars_speed: float