microcore.configuration

  1import json
  2import logging
  3import os
  4from dataclasses import dataclass, field, fields
  5from pathlib import Path
  6from typing import Any, Union, Callable
  7
  8import dotenv
  9from colorama import Fore
 10
 11_MISSING = object()
 12
 13TRUE_VALUES = ["1", "TRUE", "YES", "ON", "ENABLED", "Y", "+"]
 14"""@private"""
 15
 16
 17def from_env(default=None, dtype=None):
 18    """
 19    Provides default value for the configuration dataclass
 20    from the environment variable with the name equal to field name in upper case"""
 21    return field(
 22        default=_MISSING, metadata=dict(_from_env=True, _default=default, _dtype=dtype)
 23    )
 24
 25
 26def get_bool_from_env(env_var: str, default: bool | None = False) -> bool | None:
 27    """Convert value of environment variable to boolean"""
 28    if env_var not in os.environ:
 29        return default
 30    return os.getenv(env_var, str(default)).upper() in TRUE_VALUES
 31
 32
 33def get_object_from_env(env_var: str, dtype: type, default: Any = None):
 34    val_from_env = os.getenv(  # pylint: disable=W1508
 35        env_var, _MISSING
 36    )
 37    if isinstance(val_from_env, str):
 38        val_from_env = val_from_env.strip()
 39        if val_from_env:
 40            try:
 41                val_from_env = json.loads(val_from_env.strip())
 42                assert isinstance(
 43                    val_from_env, dtype
 44                ), f"Expected {dtype.__name__}, got {type(val_from_env).__name__}"
 45            except (json.JSONDecodeError, AssertionError) as e:
 46                raise LLMConfigError(
 47                    f"Invalid value in environment variable '{env_var}'. "
 48                    f"Expected: JSON {dtype.__name__}, received: '{val_from_env}'"
 49                ) from e
 50        else:
 51            val_from_env = _MISSING
 52    if val_from_env is _MISSING:
 53        if default is None:  # instead of default factory
 54            default = dtype()
 55        val_from_env = default
 56    return val_from_env
 57
 58
 59class ApiType:
 60    """LLM API types"""
 61
 62    OPEN_AI = "open_ai"
 63    AZURE = "azure"
 64    """See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models"""
 65    ANYSCALE = "anyscale"
 66    """See https://www.anyscale.com/endpoints"""
 67    DEEP_INFRA = "deep_infra"
 68    """List of text generation models: https://deepinfra.com/models?type=text-generation"""
 69    ANTHROPIC = "anthropic"
 70    GOOGLE_VERTEX_AI = "google_vertex_ai"
 71    GOOGLE_AI_STUDIO = "google_ai_studio"
 72
 73    # Local models
 74    FUNCTION = "function"
 75    TRANSFORMERS = "transformers"
 76    NONE = "none"
 77
 78    @staticmethod
 79    def is_local(api_type: str) -> bool:
 80        return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)
 81
 82
 83_default_dotenv_loaded = False
 84
 85
 86@dataclass
 87class BaseConfig:
 88    """Base class for configuration dataclasses"""
 89
 90    USE_DOT_ENV: bool = None
 91    DOT_ENV_FILE: str | Path = None
 92
 93    def __post_init__(self):
 94        self._dot_env_setup()
 95        self._update_from_env()
 96
 97    def _dot_env_setup(self):
 98        global _default_dotenv_loaded
 99
100        if self.USE_DOT_ENV is None:
101            self.USE_DOT_ENV = get_bool_from_env("USE_DOT_ENV", True)
102
103        if self.USE_DOT_ENV:
104            if self.DOT_ENV_FILE or not _default_dotenv_loaded:
105                dotenv.load_dotenv(override=True, dotenv_path=self.DOT_ENV_FILE)
106            if not self.DOT_ENV_FILE:
107                _default_dotenv_loaded = True
108
109    def _update_from_env(self):
110        for f in fields(self):
111            if f.metadata.get("_from_env") and getattr(self, f.name) is _MISSING:
112                env_name = f.name.upper()
113                default = f.metadata["_default"]
114                dtype = f.metadata.get("_dtype")
115                if dtype is bool:
116                    val_from_env = get_bool_from_env(env_name, default)
117                elif dtype in [dict, list]:
118                    val_from_env = get_object_from_env(env_name, dtype, default)
119                else:
120                    val_from_env = os.getenv(env_name, default)
121
122                setattr(self, f.name, val_from_env)
123
124    def __iter__(self):
125        for f in fields(self):
126            value = getattr(self, f.name)
127            yield f.name, value
128
129
130@dataclass
131class _OpenAIEnvVars:
132    # OS Environment variables expected by OpenAI library
133    # Will be used as defaults for LLM
134    # @todo: implement lib_defaults to take default values from openai lib if available
135    OPENAI_API_TYPE: str = from_env(ApiType.OPEN_AI)
136    OPENAI_API_KEY: str = from_env()
137    OPENAI_API_BASE: str = from_env("https://api.openai.com/v1")
138    OPENAI_API_VERSION: str = from_env()
139
140
141@dataclass
142class _AnthropicEnvVars:
143    ANTHROPIC_API_KEY: str = from_env()
144
145
146@dataclass
147class _GoogleVertexAiEnvVars:
148    GOOGLE_VERTEX_ACCESS_TOKEN: str = from_env()
149    GOOGLE_VERTEX_PROJECT_ID: str = from_env()
150    GOOGLE_VERTEX_LOCATION: str = from_env()
151    GOOGLE_VERTEX_GCLOUD_AUTH: bool = from_env(dtype=bool)
152
153    GOOGLE_VERTEX_RESPONSE_VALIDATION: bool = from_env(dtype=bool, default=False)
154    GOOGLE_GEMINI_SAFETY_SETTINGS: dict = from_env(dtype=dict)
155
156
157@dataclass
158class LLMConfig(BaseConfig, _OpenAIEnvVars, _AnthropicEnvVars, _GoogleVertexAiEnvVars):
159    """LLM configuration"""
160
161    LLM_API_TYPE: str = from_env()
162    """
163    See `ApiType`.
164    To use services that is not listed in `ApiType`,
165    but provides OpenAPI interface, use `ApiType.OPEN_AI`"""
166
167    LLM_API_KEY: str = from_env()
168    LLM_API_BASE: str = from_env()
169    """Base URL for the LLM API, e.g. https://api.openai.com/v1"""
170
171    LLM_API_VERSION: str = from_env()
172    LLM_DEPLOYMENT_ID: str = from_env()
173    """Required by `ApiType.AZURE`"""
174
175    MODEL: str = from_env()
176    """Language model name"""
177
178    TIKTOKEN_ENCODING: str = from_env()
179    """Will enforce using specific encoding for token size measurement"""
180
181    LLM_DEFAULT_ARGS: dict = from_env(dtype=dict)
182    """
183    You may specify here default arguments for the LLM API calls,
184     i. e. temperature, max_tokens, etc.
185     """
186
187    AZURE_DEPLOYMENT_ID: str = from_env()
188
189    INFERENCE_FUNC: Union[Callable, str] = from_env()
190    """Inference function for local models"""
191    CHAT_MODE: bool = from_env(dtype=bool)
192    """Is it a chat or completion model"""
193    INIT_PARAMS: dict = from_env(dtype=dict)
194    """Custom initialization parameters for the model"""
195
196    def __post_init__(self):
197        super().__post_init__()
198        self._init_llm_options()
199        self.validate()
200
201    def uses_local_model(self) -> bool:
202        return ApiType.is_local(self.LLM_API_TYPE)
203
204    def _init_llm_options(self):
205        if self.INFERENCE_FUNC:
206            if not self.LLM_API_TYPE:
207                self.LLM_API_TYPE = ApiType.FUNCTION
208        if self.uses_local_model():
209            return
210
211        # Use defaults from ENV variables expected by OpenAI API
212        self.LLM_API_TYPE = self.LLM_API_TYPE or self.OPENAI_API_TYPE
213
214        if self.LLM_API_TYPE == ApiType.AZURE:
215            self.LLM_API_VERSION = self.LLM_API_VERSION or self.OPENAI_API_VERSION
216            self.LLM_DEPLOYMENT_ID = self.LLM_DEPLOYMENT_ID or self.AZURE_DEPLOYMENT_ID
217        elif self.LLM_API_TYPE == ApiType.GOOGLE_AI_STUDIO:
218            self.MODEL = self.MODEL or "gemini-pro"
219        elif self.LLM_API_TYPE == ApiType.GOOGLE_VERTEX_AI:
220            self.MODEL = self.MODEL or "gemini-1.0-pro"
221            if self.GOOGLE_VERTEX_GCLOUD_AUTH is None:
222                self.GOOGLE_VERTEX_GCLOUD_AUTH = get_bool_from_env(
223                    "GOOGLE_VERTEX_GCLOUD_AUTH", not self.GOOGLE_VERTEX_ACCESS_TOKEN
224                )
225        elif self.LLM_API_TYPE == ApiType.ANYSCALE:
226            self.LLM_API_BASE = (
227                self.LLM_API_BASE or "https://api.endpoints.anyscale.com/v1"
228            )
229            self.MODEL = self.MODEL or "meta-llama/Llama-2-70b-chat-hf"
230        elif self.LLM_API_TYPE == ApiType.DEEP_INFRA:
231            self.LLM_API_BASE = (
232                self.LLM_API_BASE or "https://api.deepinfra.com/v1/openai"
233            )
234            self.MODEL = self.MODEL or "meta-llama/Llama-2-70b-chat-hf"
235        elif self.LLM_API_TYPE == ApiType.ANTHROPIC:
236            self.LLM_API_BASE = self.LLM_API_BASE or "https://api.anthropic.com/"
237            self.MODEL = self.MODEL or "claude-3-opus-20240229"
238            self.LLM_API_KEY = self.LLM_API_KEY or self.ANTHROPIC_API_KEY
239        else:
240            self.LLM_API_BASE = self.LLM_API_BASE or self.OPENAI_API_BASE
241            self.LLM_API_KEY = self.LLM_API_KEY or self.OPENAI_API_KEY
242            self.LLM_API_VERSION = self.LLM_API_VERSION or self.OPENAI_API_VERSION
243            self.MODEL = self.MODEL or "gpt-3.5-turbo"
244
245    def _validate_local_llm(self):
246        if self.CHAT_MODE is None:
247            logging.warning(
248                "When using local models, "
249                "(bool)CHAT_MODE configuration option should be explicitly set"
250            )
251        if self.LLM_API_TYPE == ApiType.FUNCTION:
252            if not self.INFERENCE_FUNC:
253                raise LLMConfigError(
254                    "LLM configuration error: "
255                    "INFERENCE_FUNC should be provided for local models"
256                )
257        elif self.LLM_API_TYPE == ApiType.TRANSFORMERS:
258            if not self.MODEL:
259                raise LLMConfigError(
260                    "LLM configuration error: "
261                    "MODEL should be provided for local transformers models"
262                )
263
264    def validate(self):
265        """
266        Validate LLM configuration
267
268        Raises:
269            LLMConfigError
270        """
271        if self.LLM_API_TYPE == ApiType.NONE:
272            return
273        if self.uses_local_model():
274            self._validate_local_llm()
275            return
276        if self.INFERENCE_FUNC:
277            raise LLMConfigError(
278                "LLM configuration error: INFERENCE_FUNC should be provided only for local models"
279            )
280        if self.LLM_API_TYPE == ApiType.GOOGLE_VERTEX_AI:
281            if (
282                not self.GOOGLE_VERTEX_ACCESS_TOKEN
283                and not self.GOOGLE_VERTEX_GCLOUD_AUTH
284            ):
285                raise LLMConfigError(
286                    "LLM configuration error: "
287                    "GOOGLE_VERTEX_ACCESS_TOKEN should be provided "
288                    "or GOOGLE_VERTEX_GCLOUD_AUTH should be enabled"
289                )
290        else:
291            if not self.LLM_API_KEY:
292                raise LLMConfigError("LLM configuration error: LLM_API_KEY is absent")
293            if self.LLM_API_TYPE == ApiType.AZURE:
294                if not self.LLM_API_BASE:
295                    raise LLMConfigError(
296                        "LLM configuration error: "
297                        "LLM_API_BASE is required for using Azure models"
298                    )
299                if not self.LLM_DEPLOYMENT_ID:
300                    raise LLMConfigError(
301                        "LLM configuration error: "
302                        "LLM_DEPLOYMENT_ID is required for using Azure models"
303                    )
304                if not self.LLM_API_VERSION:
305                    raise LLMConfigError(
306                        "LLM configuration error: "
307                        "LLM_API_VERSION is required for using Azure models"
308                    )
309
310    def describe(self, return_dict=False):
311        """
312        Informal description of the configuration
313        """
314        prev_env = os.environ.copy()
315        os.environ.clear()
316        default = Config(LLM_API_TYPE=ApiType.NONE, USE_DOT_ENV=False)
317        os.environ.update(prev_env)
318        data = {
319            k.lower().replace("llm_", ""): v
320            for k, v in dict(self).items()
321            if v is not None and v != getattr(default, k) and k != "USE_DOT_ENV"
322        }
323        for k, v in data.items():
324            if "_key" in k and isinstance(v, str):
325                if len(v) <= 3:
326                    continue
327                data[k] = v[: 1 if len(v) <= 12 else 3] + "****" + v[-2:]
328        if return_dict:
329            return data
330
331        print("Config:")
332        for k, v in data.items():
333            print(f"  {k}: {Fore.GREEN}{v}{Fore.RESET}")
334        return None
335
336
337class LLMConfigError(ValueError):
338    """LLM configuration error"""
339
340
341@dataclass
342class Config(LLMConfig):
343    """MicroCore configuration"""
344
345    USE_LOGGING: bool = from_env(default=False)
346    """Whether to use logging or not, see `microcore.use_logging`"""
347
348    PROMPT_TEMPLATES_PATH: str | Path = from_env("tpl")
349    """Path to the folder with prompt templates, ./tpl by default"""
350
351    STORAGE_PATH: str | Path = from_env("storage")
352    """Path to the folder with file storage, ./storage by default"""
353
354    STORAGE_DEFAULT_FILE_EXT: str = from_env(default="")
355
356    EMBEDDING_DB_FOLDER: str = from_env(default="embedding_db")
357    """Folder within microcore.config.Config.STORAGE_PATH for storing embeddings"""
358
359    EMBEDDING_DB_FUNCTION: Any = from_env()
360
361    EMBEDDING_DB_ALLOW_DUPLICATES: bool = from_env(dtype=bool, default=False)
362
363    DEFAULT_ENCODING: str = from_env("utf-8")
364    """Used in file system operations, utf-8 by default"""
365
366    JINJA2_AUTO_ESCAPE: bool = from_env(dtype=bool, default=False)
367
368    ELEVENLABS_API_KEY: str = from_env()
369
370    TEXT_TO_SPEECH_PATH: str | Path = from_env()
371    """Path to the folder with generated voice files"""
372
373    MAX_CONCURRENT_TASKS: int = from_env(default=None)
374
375    def __post_init__(self):
376        super().__post_init__()
377        if self.TEXT_TO_SPEECH_PATH is None:
378            self.TEXT_TO_SPEECH_PATH = Path(self.STORAGE_PATH) / "voicing"
def from_env(default=None, dtype=None):
18def from_env(default=None, dtype=None):
19    """
20    Provides default value for the configuration dataclass
21    from the environment variable with the name equal to field name in upper case"""
22    return field(
23        default=_MISSING, metadata=dict(_from_env=True, _default=default, _dtype=dtype)
24    )

Provides default value for the configuration dataclass from the environment variable with the name equal to field name in upper case

def get_bool_from_env(env_var: str, default: bool | None = False) -> bool | None:
27def get_bool_from_env(env_var: str, default: bool | None = False) -> bool | None:
28    """Convert value of environment variable to boolean"""
29    if env_var not in os.environ:
30        return default
31    return os.getenv(env_var, str(default)).upper() in TRUE_VALUES

Convert value of environment variable to boolean

def get_object_from_env(env_var: str, dtype: type, default: Any = None):
34def get_object_from_env(env_var: str, dtype: type, default: Any = None):
35    val_from_env = os.getenv(  # pylint: disable=W1508
36        env_var, _MISSING
37    )
38    if isinstance(val_from_env, str):
39        val_from_env = val_from_env.strip()
40        if val_from_env:
41            try:
42                val_from_env = json.loads(val_from_env.strip())
43                assert isinstance(
44                    val_from_env, dtype
45                ), f"Expected {dtype.__name__}, got {type(val_from_env).__name__}"
46            except (json.JSONDecodeError, AssertionError) as e:
47                raise LLMConfigError(
48                    f"Invalid value in environment variable '{env_var}'. "
49                    f"Expected: JSON {dtype.__name__}, received: '{val_from_env}'"
50                ) from e
51        else:
52            val_from_env = _MISSING
53    if val_from_env is _MISSING:
54        if default is None:  # instead of default factory
55            default = dtype()
56        val_from_env = default
57    return val_from_env
class ApiType:
60class ApiType:
61    """LLM API types"""
62
63    OPEN_AI = "open_ai"
64    AZURE = "azure"
65    """See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models"""
66    ANYSCALE = "anyscale"
67    """See https://www.anyscale.com/endpoints"""
68    DEEP_INFRA = "deep_infra"
69    """List of text generation models: https://deepinfra.com/models?type=text-generation"""
70    ANTHROPIC = "anthropic"
71    GOOGLE_VERTEX_AI = "google_vertex_ai"
72    GOOGLE_AI_STUDIO = "google_ai_studio"
73
74    # Local models
75    FUNCTION = "function"
76    TRANSFORMERS = "transformers"
77    NONE = "none"
78
79    @staticmethod
80    def is_local(api_type: str) -> bool:
81        return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)

LLM API types

OPEN_AI = 'open_ai'
ANYSCALE = 'anyscale'
DEEP_INFRA = 'deep_infra'

List of text generation models: https://deepinfra.com/models?type=text-generation

ANTHROPIC = 'anthropic'
GOOGLE_VERTEX_AI = 'google_vertex_ai'
GOOGLE_AI_STUDIO = 'google_ai_studio'
FUNCTION = 'function'
TRANSFORMERS = 'transformers'
NONE = 'none'
@staticmethod
def is_local(api_type: str) -> bool:
79    @staticmethod
80    def is_local(api_type: str) -> bool:
81        return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)
@dataclass
class BaseConfig:
 87@dataclass
 88class BaseConfig:
 89    """Base class for configuration dataclasses"""
 90
 91    USE_DOT_ENV: bool = None
 92    DOT_ENV_FILE: str | Path = None
 93
 94    def __post_init__(self):
 95        self._dot_env_setup()
 96        self._update_from_env()
 97
 98    def _dot_env_setup(self):
 99        global _default_dotenv_loaded
100
101        if self.USE_DOT_ENV is None:
102            self.USE_DOT_ENV = get_bool_from_env("USE_DOT_ENV", True)
103
104        if self.USE_DOT_ENV:
105            if self.DOT_ENV_FILE or not _default_dotenv_loaded:
106                dotenv.load_dotenv(override=True, dotenv_path=self.DOT_ENV_FILE)
107            if not self.DOT_ENV_FILE:
108                _default_dotenv_loaded = True
109
110    def _update_from_env(self):
111        for f in fields(self):
112            if f.metadata.get("_from_env") and getattr(self, f.name) is _MISSING:
113                env_name = f.name.upper()
114                default = f.metadata["_default"]
115                dtype = f.metadata.get("_dtype")
116                if dtype is bool:
117                    val_from_env = get_bool_from_env(env_name, default)
118                elif dtype in [dict, list]:
119                    val_from_env = get_object_from_env(env_name, dtype, default)
120                else:
121                    val_from_env = os.getenv(env_name, default)
122
123                setattr(self, f.name, val_from_env)
124
125    def __iter__(self):
126        for f in fields(self):
127            value = getattr(self, f.name)
128            yield f.name, value

Base class for configuration dataclasses

BaseConfig(USE_DOT_ENV: bool = None, DOT_ENV_FILE: str | pathlib.Path = None)
USE_DOT_ENV: bool = None
DOT_ENV_FILE: str | pathlib.Path = None
@dataclass
class LLMConfig(BaseConfig, _OpenAIEnvVars, _AnthropicEnvVars, _GoogleVertexAiEnvVars):
158@dataclass
159class LLMConfig(BaseConfig, _OpenAIEnvVars, _AnthropicEnvVars, _GoogleVertexAiEnvVars):
160    """LLM configuration"""
161
162    LLM_API_TYPE: str = from_env()
163    """
164    See `ApiType`.
165    To use services that is not listed in `ApiType`,
166    but provides OpenAPI interface, use `ApiType.OPEN_AI`"""
167
168    LLM_API_KEY: str = from_env()
169    LLM_API_BASE: str = from_env()
170    """Base URL for the LLM API, e.g. https://api.openai.com/v1"""
171
172    LLM_API_VERSION: str = from_env()
173    LLM_DEPLOYMENT_ID: str = from_env()
174    """Required by `ApiType.AZURE`"""
175
176    MODEL: str = from_env()
177    """Language model name"""
178
179    TIKTOKEN_ENCODING: str = from_env()
180    """Will enforce using specific encoding for token size measurement"""
181
182    LLM_DEFAULT_ARGS: dict = from_env(dtype=dict)
183    """
184    You may specify here default arguments for the LLM API calls,
185     i. e. temperature, max_tokens, etc.
186     """
187
188    AZURE_DEPLOYMENT_ID: str = from_env()
189
190    INFERENCE_FUNC: Union[Callable, str] = from_env()
191    """Inference function for local models"""
192    CHAT_MODE: bool = from_env(dtype=bool)
193    """Is it a chat or completion model"""
194    INIT_PARAMS: dict = from_env(dtype=dict)
195    """Custom initialization parameters for the model"""
196
197    def __post_init__(self):
198        super().__post_init__()
199        self._init_llm_options()
200        self.validate()
201
202    def uses_local_model(self) -> bool:
203        return ApiType.is_local(self.LLM_API_TYPE)
204
205    def _init_llm_options(self):
206        if self.INFERENCE_FUNC:
207            if not self.LLM_API_TYPE:
208                self.LLM_API_TYPE = ApiType.FUNCTION
209        if self.uses_local_model():
210            return
211
212        # Use defaults from ENV variables expected by OpenAI API
213        self.LLM_API_TYPE = self.LLM_API_TYPE or self.OPENAI_API_TYPE
214
215        if self.LLM_API_TYPE == ApiType.AZURE:
216            self.LLM_API_VERSION = self.LLM_API_VERSION or self.OPENAI_API_VERSION
217            self.LLM_DEPLOYMENT_ID = self.LLM_DEPLOYMENT_ID or self.AZURE_DEPLOYMENT_ID
218        elif self.LLM_API_TYPE == ApiType.GOOGLE_AI_STUDIO:
219            self.MODEL = self.MODEL or "gemini-pro"
220        elif self.LLM_API_TYPE == ApiType.GOOGLE_VERTEX_AI:
221            self.MODEL = self.MODEL or "gemini-1.0-pro"
222            if self.GOOGLE_VERTEX_GCLOUD_AUTH is None:
223                self.GOOGLE_VERTEX_GCLOUD_AUTH = get_bool_from_env(
224                    "GOOGLE_VERTEX_GCLOUD_AUTH", not self.GOOGLE_VERTEX_ACCESS_TOKEN
225                )
226        elif self.LLM_API_TYPE == ApiType.ANYSCALE:
227            self.LLM_API_BASE = (
228                self.LLM_API_BASE or "https://api.endpoints.anyscale.com/v1"
229            )
230            self.MODEL = self.MODEL or "meta-llama/Llama-2-70b-chat-hf"
231        elif self.LLM_API_TYPE == ApiType.DEEP_INFRA:
232            self.LLM_API_BASE = (
233                self.LLM_API_BASE or "https://api.deepinfra.com/v1/openai"
234            )
235            self.MODEL = self.MODEL or "meta-llama/Llama-2-70b-chat-hf"
236        elif self.LLM_API_TYPE == ApiType.ANTHROPIC:
237            self.LLM_API_BASE = self.LLM_API_BASE or "https://api.anthropic.com/"
238            self.MODEL = self.MODEL or "claude-3-opus-20240229"
239            self.LLM_API_KEY = self.LLM_API_KEY or self.ANTHROPIC_API_KEY
240        else:
241            self.LLM_API_BASE = self.LLM_API_BASE or self.OPENAI_API_BASE
242            self.LLM_API_KEY = self.LLM_API_KEY or self.OPENAI_API_KEY
243            self.LLM_API_VERSION = self.LLM_API_VERSION or self.OPENAI_API_VERSION
244            self.MODEL = self.MODEL or "gpt-3.5-turbo"
245
246    def _validate_local_llm(self):
247        if self.CHAT_MODE is None:
248            logging.warning(
249                "When using local models, "
250                "(bool)CHAT_MODE configuration option should be explicitly set"
251            )
252        if self.LLM_API_TYPE == ApiType.FUNCTION:
253            if not self.INFERENCE_FUNC:
254                raise LLMConfigError(
255                    "LLM configuration error: "
256                    "INFERENCE_FUNC should be provided for local models"
257                )
258        elif self.LLM_API_TYPE == ApiType.TRANSFORMERS:
259            if not self.MODEL:
260                raise LLMConfigError(
261                    "LLM configuration error: "
262                    "MODEL should be provided for local transformers models"
263                )
264
265    def validate(self):
266        """
267        Validate LLM configuration
268
269        Raises:
270            LLMConfigError
271        """
272        if self.LLM_API_TYPE == ApiType.NONE:
273            return
274        if self.uses_local_model():
275            self._validate_local_llm()
276            return
277        if self.INFERENCE_FUNC:
278            raise LLMConfigError(
279                "LLM configuration error: INFERENCE_FUNC should be provided only for local models"
280            )
281        if self.LLM_API_TYPE == ApiType.GOOGLE_VERTEX_AI:
282            if (
283                not self.GOOGLE_VERTEX_ACCESS_TOKEN
284                and not self.GOOGLE_VERTEX_GCLOUD_AUTH
285            ):
286                raise LLMConfigError(
287                    "LLM configuration error: "
288                    "GOOGLE_VERTEX_ACCESS_TOKEN should be provided "
289                    "or GOOGLE_VERTEX_GCLOUD_AUTH should be enabled"
290                )
291        else:
292            if not self.LLM_API_KEY:
293                raise LLMConfigError("LLM configuration error: LLM_API_KEY is absent")
294            if self.LLM_API_TYPE == ApiType.AZURE:
295                if not self.LLM_API_BASE:
296                    raise LLMConfigError(
297                        "LLM configuration error: "
298                        "LLM_API_BASE is required for using Azure models"
299                    )
300                if not self.LLM_DEPLOYMENT_ID:
301                    raise LLMConfigError(
302                        "LLM configuration error: "
303                        "LLM_DEPLOYMENT_ID is required for using Azure models"
304                    )
305                if not self.LLM_API_VERSION:
306                    raise LLMConfigError(
307                        "LLM configuration error: "
308                        "LLM_API_VERSION is required for using Azure models"
309                    )
310
311    def describe(self, return_dict=False):
312        """
313        Informal description of the configuration
314        """
315        prev_env = os.environ.copy()
316        os.environ.clear()
317        default = Config(LLM_API_TYPE=ApiType.NONE, USE_DOT_ENV=False)
318        os.environ.update(prev_env)
319        data = {
320            k.lower().replace("llm_", ""): v
321            for k, v in dict(self).items()
322            if v is not None and v != getattr(default, k) and k != "USE_DOT_ENV"
323        }
324        for k, v in data.items():
325            if "_key" in k and isinstance(v, str):
326                if len(v) <= 3:
327                    continue
328                data[k] = v[: 1 if len(v) <= 12 else 3] + "****" + v[-2:]
329        if return_dict:
330            return data
331
332        print("Config:")
333        for k, v in data.items():
334            print(f"  {k}: {Fore.GREEN}{v}{Fore.RESET}")
335        return None

LLM configuration

LLMConfig( GOOGLE_VERTEX_ACCESS_TOKEN: str = <object object>, GOOGLE_VERTEX_PROJECT_ID: str = <object object>, GOOGLE_VERTEX_LOCATION: str = <object object>, GOOGLE_VERTEX_GCLOUD_AUTH: bool = <object object>, GOOGLE_VERTEX_RESPONSE_VALIDATION: bool = <object object>, GOOGLE_GEMINI_SAFETY_SETTINGS: dict = <object object>, ANTHROPIC_API_KEY: str = <object object>, OPENAI_API_TYPE: str = <object object>, OPENAI_API_KEY: str = <object object>, OPENAI_API_BASE: str = <object object>, OPENAI_API_VERSION: str = <object object>, USE_DOT_ENV: bool = None, DOT_ENV_FILE: str | pathlib.Path = None, LLM_API_TYPE: str = <object object>, LLM_API_KEY: str = <object object>, LLM_API_BASE: str = <object object>, LLM_API_VERSION: str = <object object>, LLM_DEPLOYMENT_ID: str = <object object>, MODEL: str = <object object>, TIKTOKEN_ENCODING: str = <object object>, LLM_DEFAULT_ARGS: dict = <object object>, AZURE_DEPLOYMENT_ID: str = <object object>, INFERENCE_FUNC: Union[Callable, str] = <object object>, CHAT_MODE: bool = <object object>, INIT_PARAMS: dict = <object object>)
LLM_API_TYPE: str = <object object>

See ApiType. To use services that is not listed in ApiType, but provides OpenAPI interface, use ApiType.OPEN_AI

LLM_API_KEY: str = <object object>
LLM_API_BASE: str = <object object>

Base URL for the LLM API, e.g. https://api.openai.com/v1

LLM_API_VERSION: str = <object object>
LLM_DEPLOYMENT_ID: str = <object object>

Required by ApiType.AZURE

MODEL: str = <object object>

Language model name

TIKTOKEN_ENCODING: str = <object object>

Will enforce using specific encoding for token size measurement

LLM_DEFAULT_ARGS: dict = <object object>

You may specify here default arguments for the LLM API calls, i. e. temperature, max_tokens, etc.

AZURE_DEPLOYMENT_ID: str = <object object>
INFERENCE_FUNC: Union[Callable, str] = <object object>

Inference function for local models

CHAT_MODE: bool = <object object>

Is it a chat or completion model

INIT_PARAMS: dict = <object object>

Custom initialization parameters for the model

def uses_local_model(self) -> bool:
202    def uses_local_model(self) -> bool:
203        return ApiType.is_local(self.LLM_API_TYPE)
def validate(self):
265    def validate(self):
266        """
267        Validate LLM configuration
268
269        Raises:
270            LLMConfigError
271        """
272        if self.LLM_API_TYPE == ApiType.NONE:
273            return
274        if self.uses_local_model():
275            self._validate_local_llm()
276            return
277        if self.INFERENCE_FUNC:
278            raise LLMConfigError(
279                "LLM configuration error: INFERENCE_FUNC should be provided only for local models"
280            )
281        if self.LLM_API_TYPE == ApiType.GOOGLE_VERTEX_AI:
282            if (
283                not self.GOOGLE_VERTEX_ACCESS_TOKEN
284                and not self.GOOGLE_VERTEX_GCLOUD_AUTH
285            ):
286                raise LLMConfigError(
287                    "LLM configuration error: "
288                    "GOOGLE_VERTEX_ACCESS_TOKEN should be provided "
289                    "or GOOGLE_VERTEX_GCLOUD_AUTH should be enabled"
290                )
291        else:
292            if not self.LLM_API_KEY:
293                raise LLMConfigError("LLM configuration error: LLM_API_KEY is absent")
294            if self.LLM_API_TYPE == ApiType.AZURE:
295                if not self.LLM_API_BASE:
296                    raise LLMConfigError(
297                        "LLM configuration error: "
298                        "LLM_API_BASE is required for using Azure models"
299                    )
300                if not self.LLM_DEPLOYMENT_ID:
301                    raise LLMConfigError(
302                        "LLM configuration error: "
303                        "LLM_DEPLOYMENT_ID is required for using Azure models"
304                    )
305                if not self.LLM_API_VERSION:
306                    raise LLMConfigError(
307                        "LLM configuration error: "
308                        "LLM_API_VERSION is required for using Azure models"
309                    )

Validate LLM configuration

Raises:
  • LLMConfigError
def describe(self, return_dict=False):
311    def describe(self, return_dict=False):
312        """
313        Informal description of the configuration
314        """
315        prev_env = os.environ.copy()
316        os.environ.clear()
317        default = Config(LLM_API_TYPE=ApiType.NONE, USE_DOT_ENV=False)
318        os.environ.update(prev_env)
319        data = {
320            k.lower().replace("llm_", ""): v
321            for k, v in dict(self).items()
322            if v is not None and v != getattr(default, k) and k != "USE_DOT_ENV"
323        }
324        for k, v in data.items():
325            if "_key" in k and isinstance(v, str):
326                if len(v) <= 3:
327                    continue
328                data[k] = v[: 1 if len(v) <= 12 else 3] + "****" + v[-2:]
329        if return_dict:
330            return data
331
332        print("Config:")
333        for k, v in data.items():
334            print(f"  {k}: {Fore.GREEN}{v}{Fore.RESET}")
335        return None

Informal description of the configuration

class LLMConfigError(builtins.ValueError):
338class LLMConfigError(ValueError):
339    """LLM configuration error"""

LLM configuration error

Inherited Members
builtins.ValueError
ValueError
builtins.BaseException
with_traceback
add_note
args
@dataclass
class Config(LLMConfig):
342@dataclass
343class Config(LLMConfig):
344    """MicroCore configuration"""
345
346    USE_LOGGING: bool = from_env(default=False)
347    """Whether to use logging or not, see `microcore.use_logging`"""
348
349    PROMPT_TEMPLATES_PATH: str | Path = from_env("tpl")
350    """Path to the folder with prompt templates, ./tpl by default"""
351
352    STORAGE_PATH: str | Path = from_env("storage")
353    """Path to the folder with file storage, ./storage by default"""
354
355    STORAGE_DEFAULT_FILE_EXT: str = from_env(default="")
356
357    EMBEDDING_DB_FOLDER: str = from_env(default="embedding_db")
358    """Folder within microcore.config.Config.STORAGE_PATH for storing embeddings"""
359
360    EMBEDDING_DB_FUNCTION: Any = from_env()
361
362    EMBEDDING_DB_ALLOW_DUPLICATES: bool = from_env(dtype=bool, default=False)
363
364    DEFAULT_ENCODING: str = from_env("utf-8")
365    """Used in file system operations, utf-8 by default"""
366
367    JINJA2_AUTO_ESCAPE: bool = from_env(dtype=bool, default=False)
368
369    ELEVENLABS_API_KEY: str = from_env()
370
371    TEXT_TO_SPEECH_PATH: str | Path = from_env()
372    """Path to the folder with generated voice files"""
373
374    MAX_CONCURRENT_TASKS: int = from_env(default=None)
375
376    def __post_init__(self):
377        super().__post_init__()
378        if self.TEXT_TO_SPEECH_PATH is None:
379            self.TEXT_TO_SPEECH_PATH = Path(self.STORAGE_PATH) / "voicing"

MicroCore configuration

Config( GOOGLE_VERTEX_ACCESS_TOKEN: str = <object object>, GOOGLE_VERTEX_PROJECT_ID: str = <object object>, GOOGLE_VERTEX_LOCATION: str = <object object>, GOOGLE_VERTEX_GCLOUD_AUTH: bool = <object object>, GOOGLE_VERTEX_RESPONSE_VALIDATION: bool = <object object>, GOOGLE_GEMINI_SAFETY_SETTINGS: dict = <object object>, ANTHROPIC_API_KEY: str = <object object>, OPENAI_API_TYPE: str = <object object>, OPENAI_API_KEY: str = <object object>, OPENAI_API_BASE: str = <object object>, OPENAI_API_VERSION: str = <object object>, USE_DOT_ENV: bool = None, DOT_ENV_FILE: str | pathlib.Path = None, LLM_API_TYPE: str = <object object>, LLM_API_KEY: str = <object object>, LLM_API_BASE: str = <object object>, LLM_API_VERSION: str = <object object>, LLM_DEPLOYMENT_ID: str = <object object>, MODEL: str = <object object>, TIKTOKEN_ENCODING: str = <object object>, LLM_DEFAULT_ARGS: dict = <object object>, AZURE_DEPLOYMENT_ID: str = <object object>, INFERENCE_FUNC: Union[Callable, str] = <object object>, CHAT_MODE: bool = <object object>, INIT_PARAMS: dict = <object object>, USE_LOGGING: bool = <object object>, PROMPT_TEMPLATES_PATH: str | pathlib.Path = <object object>, STORAGE_PATH: str | pathlib.Path = <object object>, STORAGE_DEFAULT_FILE_EXT: str = <object object>, EMBEDDING_DB_FOLDER: str = <object object>, EMBEDDING_DB_FUNCTION: Any = <object object>, EMBEDDING_DB_ALLOW_DUPLICATES: bool = <object object>, DEFAULT_ENCODING: str = <object object>, JINJA2_AUTO_ESCAPE: bool = <object object>, ELEVENLABS_API_KEY: str = <object object>, TEXT_TO_SPEECH_PATH: str | pathlib.Path = <object object>, MAX_CONCURRENT_TASKS: int = <object object>)
USE_LOGGING: bool = <object object>

Whether to use logging or not, see microcore.use_logging

PROMPT_TEMPLATES_PATH: str | pathlib.Path = <object object>

Path to the folder with prompt templates, ./tpl by default

STORAGE_PATH: str | pathlib.Path = <object object>

Path to the folder with file storage, ./storage by default

STORAGE_DEFAULT_FILE_EXT: str = <object object>
EMBEDDING_DB_FOLDER: str = <object object>

Folder within Config.STORAGE_PATH for storing embeddings

EMBEDDING_DB_FUNCTION: Any = <object object>
EMBEDDING_DB_ALLOW_DUPLICATES: bool = <object object>
DEFAULT_ENCODING: str = <object object>

Used in file system operations, utf-8 by default

JINJA2_AUTO_ESCAPE: bool = <object object>
ELEVENLABS_API_KEY: str = <object object>
TEXT_TO_SPEECH_PATH: str | pathlib.Path = <object object>

Path to the folder with generated voice files

MAX_CONCURRENT_TASKS: int = <object object>