microcore.configuration
1import json 2import logging 3import os 4from dataclasses import dataclass, field, fields 5from pathlib import Path 6from typing import Any, Union, Callable 7 8import dotenv 9from colorama import Fore 10 11_MISSING = object() 12 13TRUE_VALUES = ["1", "TRUE", "YES", "ON", "ENABLED", "Y", "+"] 14"""@private""" 15 16 17def from_env(default=None, dtype=None): 18 """ 19 Provides default value for the configuration dataclass 20 from the environment variable with the name equal to field name in upper case""" 21 return field( 22 default=_MISSING, metadata=dict(_from_env=True, _default=default, _dtype=dtype) 23 ) 24 25 26def get_bool_from_env(env_var: str, default: bool | None = False) -> bool | None: 27 """Convert value of environment variable to boolean""" 28 if env_var not in os.environ: 29 return default 30 return os.getenv(env_var, str(default)).upper() in TRUE_VALUES 31 32 33def get_object_from_env(env_var: str, dtype: type, default: Any = None): 34 val_from_env = os.getenv( # pylint: disable=W1508 35 env_var, _MISSING 36 ) 37 if isinstance(val_from_env, str): 38 val_from_env = val_from_env.strip() 39 if val_from_env: 40 try: 41 val_from_env = json.loads(val_from_env.strip()) 42 assert isinstance( 43 val_from_env, dtype 44 ), f"Expected {dtype.__name__}, got {type(val_from_env).__name__}" 45 except (json.JSONDecodeError, AssertionError) as e: 46 raise LLMConfigError( 47 f"Invalid value in environment variable '{env_var}'. " 48 f"Expected: JSON {dtype.__name__}, received: '{val_from_env}'" 49 ) from e 50 else: 51 val_from_env = _MISSING 52 if val_from_env is _MISSING: 53 if default is None: # instead of default factory 54 default = dtype() 55 val_from_env = default 56 return val_from_env 57 58 59class ApiType: 60 """LLM API types""" 61 62 OPEN_AI = "open_ai" 63 AZURE = "azure" 64 """See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models""" 65 ANYSCALE = "anyscale" 66 """See https://www.anyscale.com/endpoints""" 67 DEEP_INFRA = "deep_infra" 68 """List of text generation models: https://deepinfra.com/models?type=text-generation""" 69 ANTHROPIC = "anthropic" 70 GOOGLE_VERTEX_AI = "google_vertex_ai" 71 GOOGLE_AI_STUDIO = "google_ai_studio" 72 73 # Local models 74 FUNCTION = "function" 75 TRANSFORMERS = "transformers" 76 NONE = "none" 77 78 @staticmethod 79 def is_local(api_type: str) -> bool: 80 return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE) 81 82 83_default_dotenv_loaded = False 84 85 86@dataclass 87class BaseConfig: 88 """Base class for configuration dataclasses""" 89 90 USE_DOT_ENV: bool = None 91 DOT_ENV_FILE: str | Path = None 92 93 def __post_init__(self): 94 self._dot_env_setup() 95 self._update_from_env() 96 97 def _dot_env_setup(self): 98 global _default_dotenv_loaded 99 100 if self.USE_DOT_ENV is None: 101 self.USE_DOT_ENV = get_bool_from_env("USE_DOT_ENV", True) 102 103 if self.USE_DOT_ENV: 104 if self.DOT_ENV_FILE or not _default_dotenv_loaded: 105 dotenv.load_dotenv(override=True, dotenv_path=self.DOT_ENV_FILE) 106 if not self.DOT_ENV_FILE: 107 _default_dotenv_loaded = True 108 109 def _update_from_env(self): 110 for f in fields(self): 111 if f.metadata.get("_from_env") and getattr(self, f.name) is _MISSING: 112 env_name = f.name.upper() 113 default = f.metadata["_default"] 114 dtype = f.metadata.get("_dtype") 115 if dtype is bool: 116 val_from_env = get_bool_from_env(env_name, default) 117 elif dtype in [dict, list]: 118 val_from_env = get_object_from_env(env_name, dtype, default) 119 else: 120 val_from_env = os.getenv(env_name, default) 121 122 setattr(self, f.name, val_from_env) 123 124 def __iter__(self): 125 for f in fields(self): 126 value = getattr(self, f.name) 127 yield f.name, value 128 129 130@dataclass 131class _OpenAIEnvVars: 132 # OS Environment variables expected by OpenAI library 133 # Will be used as defaults for LLM 134 # @todo: implement lib_defaults to take default values from openai lib if available 135 OPENAI_API_TYPE: str = from_env(ApiType.OPEN_AI) 136 OPENAI_API_KEY: str = from_env() 137 OPENAI_API_BASE: str = from_env("https://api.openai.com/v1") 138 OPENAI_API_VERSION: str = from_env() 139 140 141@dataclass 142class _AnthropicEnvVars: 143 ANTHROPIC_API_KEY: str = from_env() 144 145 146@dataclass 147class _GoogleVertexAiEnvVars: 148 GOOGLE_VERTEX_ACCESS_TOKEN: str = from_env() 149 GOOGLE_VERTEX_PROJECT_ID: str = from_env() 150 GOOGLE_VERTEX_LOCATION: str = from_env() 151 GOOGLE_VERTEX_GCLOUD_AUTH: bool = from_env(dtype=bool) 152 153 GOOGLE_VERTEX_RESPONSE_VALIDATION: bool = from_env(dtype=bool, default=False) 154 GOOGLE_GEMINI_SAFETY_SETTINGS: dict = from_env(dtype=dict) 155 156 157@dataclass 158class LLMConfig(BaseConfig, _OpenAIEnvVars, _AnthropicEnvVars, _GoogleVertexAiEnvVars): 159 """LLM configuration""" 160 161 LLM_API_TYPE: str = from_env() 162 """ 163 See `ApiType`. 164 To use services that is not listed in `ApiType`, 165 but provides OpenAPI interface, use `ApiType.OPEN_AI`""" 166 167 LLM_API_KEY: str = from_env() 168 LLM_API_BASE: str = from_env() 169 """Base URL for the LLM API, e.g. https://api.openai.com/v1""" 170 171 LLM_API_VERSION: str = from_env() 172 LLM_DEPLOYMENT_ID: str = from_env() 173 """Required by `ApiType.AZURE`""" 174 175 MODEL: str = from_env() 176 """Language model name""" 177 178 TIKTOKEN_ENCODING: str = from_env() 179 """Will enforce using specific encoding for token size measurement""" 180 181 LLM_DEFAULT_ARGS: dict = from_env(dtype=dict) 182 """ 183 You may specify here default arguments for the LLM API calls, 184 i. e. temperature, max_tokens, etc. 185 """ 186 187 AZURE_DEPLOYMENT_ID: str = from_env() 188 189 INFERENCE_FUNC: Union[Callable, str] = from_env() 190 """Inference function for local models""" 191 CHAT_MODE: bool = from_env(dtype=bool) 192 """Is it a chat or completion model""" 193 INIT_PARAMS: dict = from_env(dtype=dict) 194 """Custom initialization parameters for the model""" 195 196 def __post_init__(self): 197 super().__post_init__() 198 self._init_llm_options() 199 self.validate() 200 201 def uses_local_model(self) -> bool: 202 return ApiType.is_local(self.LLM_API_TYPE) 203 204 def _init_llm_options(self): 205 if self.INFERENCE_FUNC: 206 if not self.LLM_API_TYPE: 207 self.LLM_API_TYPE = ApiType.FUNCTION 208 if self.uses_local_model(): 209 return 210 211 # Use defaults from ENV variables expected by OpenAI API 212 self.LLM_API_TYPE = self.LLM_API_TYPE or self.OPENAI_API_TYPE 213 214 if self.LLM_API_TYPE == ApiType.AZURE: 215 self.LLM_API_VERSION = self.LLM_API_VERSION or self.OPENAI_API_VERSION 216 self.LLM_DEPLOYMENT_ID = self.LLM_DEPLOYMENT_ID or self.AZURE_DEPLOYMENT_ID 217 elif self.LLM_API_TYPE == ApiType.GOOGLE_AI_STUDIO: 218 self.MODEL = self.MODEL or "gemini-pro" 219 elif self.LLM_API_TYPE == ApiType.GOOGLE_VERTEX_AI: 220 self.MODEL = self.MODEL or "gemini-1.0-pro" 221 if self.GOOGLE_VERTEX_GCLOUD_AUTH is None: 222 self.GOOGLE_VERTEX_GCLOUD_AUTH = get_bool_from_env( 223 "GOOGLE_VERTEX_GCLOUD_AUTH", not self.GOOGLE_VERTEX_ACCESS_TOKEN 224 ) 225 elif self.LLM_API_TYPE == ApiType.ANYSCALE: 226 self.LLM_API_BASE = ( 227 self.LLM_API_BASE or "https://api.endpoints.anyscale.com/v1" 228 ) 229 self.MODEL = self.MODEL or "meta-llama/Llama-2-70b-chat-hf" 230 elif self.LLM_API_TYPE == ApiType.DEEP_INFRA: 231 self.LLM_API_BASE = ( 232 self.LLM_API_BASE or "https://api.deepinfra.com/v1/openai" 233 ) 234 self.MODEL = self.MODEL or "meta-llama/Llama-2-70b-chat-hf" 235 elif self.LLM_API_TYPE == ApiType.ANTHROPIC: 236 self.LLM_API_BASE = self.LLM_API_BASE or "https://api.anthropic.com/" 237 self.MODEL = self.MODEL or "claude-3-opus-20240229" 238 self.LLM_API_KEY = self.LLM_API_KEY or self.ANTHROPIC_API_KEY 239 else: 240 self.LLM_API_BASE = self.LLM_API_BASE or self.OPENAI_API_BASE 241 self.LLM_API_KEY = self.LLM_API_KEY or self.OPENAI_API_KEY 242 self.LLM_API_VERSION = self.LLM_API_VERSION or self.OPENAI_API_VERSION 243 self.MODEL = self.MODEL or "gpt-3.5-turbo" 244 245 def _validate_local_llm(self): 246 if self.CHAT_MODE is None: 247 logging.warning( 248 "When using local models, " 249 "(bool)CHAT_MODE configuration option should be explicitly set" 250 ) 251 if self.LLM_API_TYPE == ApiType.FUNCTION: 252 if not self.INFERENCE_FUNC: 253 raise LLMConfigError( 254 "LLM configuration error: " 255 "INFERENCE_FUNC should be provided for local models" 256 ) 257 elif self.LLM_API_TYPE == ApiType.TRANSFORMERS: 258 if not self.MODEL: 259 raise LLMConfigError( 260 "LLM configuration error: " 261 "MODEL should be provided for local transformers models" 262 ) 263 264 def validate(self): 265 """ 266 Validate LLM configuration 267 268 Raises: 269 LLMConfigError 270 """ 271 if self.LLM_API_TYPE == ApiType.NONE: 272 return 273 if self.uses_local_model(): 274 self._validate_local_llm() 275 return 276 if self.INFERENCE_FUNC: 277 raise LLMConfigError( 278 "LLM configuration error: INFERENCE_FUNC should be provided only for local models" 279 ) 280 if self.LLM_API_TYPE == ApiType.GOOGLE_VERTEX_AI: 281 if ( 282 not self.GOOGLE_VERTEX_ACCESS_TOKEN 283 and not self.GOOGLE_VERTEX_GCLOUD_AUTH 284 ): 285 raise LLMConfigError( 286 "LLM configuration error: " 287 "GOOGLE_VERTEX_ACCESS_TOKEN should be provided " 288 "or GOOGLE_VERTEX_GCLOUD_AUTH should be enabled" 289 ) 290 else: 291 if not self.LLM_API_KEY: 292 raise LLMConfigError("LLM configuration error: LLM_API_KEY is absent") 293 if self.LLM_API_TYPE == ApiType.AZURE: 294 if not self.LLM_API_BASE: 295 raise LLMConfigError( 296 "LLM configuration error: " 297 "LLM_API_BASE is required for using Azure models" 298 ) 299 if not self.LLM_DEPLOYMENT_ID: 300 raise LLMConfigError( 301 "LLM configuration error: " 302 "LLM_DEPLOYMENT_ID is required for using Azure models" 303 ) 304 if not self.LLM_API_VERSION: 305 raise LLMConfigError( 306 "LLM configuration error: " 307 "LLM_API_VERSION is required for using Azure models" 308 ) 309 310 def describe(self, return_dict=False): 311 """ 312 Informal description of the configuration 313 """ 314 prev_env = os.environ.copy() 315 os.environ.clear() 316 default = Config(LLM_API_TYPE=ApiType.NONE, USE_DOT_ENV=False) 317 os.environ.update(prev_env) 318 data = { 319 k.lower().replace("llm_", ""): v 320 for k, v in dict(self).items() 321 if v is not None and v != getattr(default, k) and k != "USE_DOT_ENV" 322 } 323 for k, v in data.items(): 324 if "_key" in k and isinstance(v, str): 325 if len(v) <= 3: 326 continue 327 data[k] = v[: 1 if len(v) <= 12 else 3] + "****" + v[-2:] 328 if return_dict: 329 return data 330 331 print("Config:") 332 for k, v in data.items(): 333 print(f" {k}: {Fore.GREEN}{v}{Fore.RESET}") 334 return None 335 336 337class LLMConfigError(ValueError): 338 """LLM configuration error""" 339 340 341@dataclass 342class Config(LLMConfig): 343 """MicroCore configuration""" 344 345 USE_LOGGING: bool = from_env(default=False) 346 """Whether to use logging or not, see `microcore.use_logging`""" 347 348 PROMPT_TEMPLATES_PATH: str | Path = from_env("tpl") 349 """Path to the folder with prompt templates, ./tpl by default""" 350 351 STORAGE_PATH: str | Path = from_env("storage") 352 """Path to the folder with file storage, ./storage by default""" 353 354 STORAGE_DEFAULT_FILE_EXT: str = from_env(default="") 355 356 EMBEDDING_DB_FOLDER: str = from_env(default="embedding_db") 357 """Folder within microcore.config.Config.STORAGE_PATH for storing embeddings""" 358 359 EMBEDDING_DB_FUNCTION: Any = from_env() 360 361 EMBEDDING_DB_ALLOW_DUPLICATES: bool = from_env(dtype=bool, default=False) 362 363 DEFAULT_ENCODING: str = from_env("utf-8") 364 """Used in file system operations, utf-8 by default""" 365 366 JINJA2_AUTO_ESCAPE: bool = from_env(dtype=bool, default=False) 367 368 ELEVENLABS_API_KEY: str = from_env() 369 370 TEXT_TO_SPEECH_PATH: str | Path = from_env() 371 """Path to the folder with generated voice files""" 372 373 MAX_CONCURRENT_TASKS: int = from_env(default=None) 374 375 def __post_init__(self): 376 super().__post_init__() 377 if self.TEXT_TO_SPEECH_PATH is None: 378 self.TEXT_TO_SPEECH_PATH = Path(self.STORAGE_PATH) / "voicing"
18def from_env(default=None, dtype=None): 19 """ 20 Provides default value for the configuration dataclass 21 from the environment variable with the name equal to field name in upper case""" 22 return field( 23 default=_MISSING, metadata=dict(_from_env=True, _default=default, _dtype=dtype) 24 )
Provides default value for the configuration dataclass from the environment variable with the name equal to field name in upper case
27def get_bool_from_env(env_var: str, default: bool | None = False) -> bool | None: 28 """Convert value of environment variable to boolean""" 29 if env_var not in os.environ: 30 return default 31 return os.getenv(env_var, str(default)).upper() in TRUE_VALUES
Convert value of environment variable to boolean
34def get_object_from_env(env_var: str, dtype: type, default: Any = None): 35 val_from_env = os.getenv( # pylint: disable=W1508 36 env_var, _MISSING 37 ) 38 if isinstance(val_from_env, str): 39 val_from_env = val_from_env.strip() 40 if val_from_env: 41 try: 42 val_from_env = json.loads(val_from_env.strip()) 43 assert isinstance( 44 val_from_env, dtype 45 ), f"Expected {dtype.__name__}, got {type(val_from_env).__name__}" 46 except (json.JSONDecodeError, AssertionError) as e: 47 raise LLMConfigError( 48 f"Invalid value in environment variable '{env_var}'. " 49 f"Expected: JSON {dtype.__name__}, received: '{val_from_env}'" 50 ) from e 51 else: 52 val_from_env = _MISSING 53 if val_from_env is _MISSING: 54 if default is None: # instead of default factory 55 default = dtype() 56 val_from_env = default 57 return val_from_env
60class ApiType: 61 """LLM API types""" 62 63 OPEN_AI = "open_ai" 64 AZURE = "azure" 65 """See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models""" 66 ANYSCALE = "anyscale" 67 """See https://www.anyscale.com/endpoints""" 68 DEEP_INFRA = "deep_infra" 69 """List of text generation models: https://deepinfra.com/models?type=text-generation""" 70 ANTHROPIC = "anthropic" 71 GOOGLE_VERTEX_AI = "google_vertex_ai" 72 GOOGLE_AI_STUDIO = "google_ai_studio" 73 74 # Local models 75 FUNCTION = "function" 76 TRANSFORMERS = "transformers" 77 NONE = "none" 78 79 @staticmethod 80 def is_local(api_type: str) -> bool: 81 return api_type in (ApiType.FUNCTION, ApiType.TRANSFORMERS, ApiType.NONE)
LLM API types
List of text generation models: https://deepinfra.com/models?type=text-generation
87@dataclass 88class BaseConfig: 89 """Base class for configuration dataclasses""" 90 91 USE_DOT_ENV: bool = None 92 DOT_ENV_FILE: str | Path = None 93 94 def __post_init__(self): 95 self._dot_env_setup() 96 self._update_from_env() 97 98 def _dot_env_setup(self): 99 global _default_dotenv_loaded 100 101 if self.USE_DOT_ENV is None: 102 self.USE_DOT_ENV = get_bool_from_env("USE_DOT_ENV", True) 103 104 if self.USE_DOT_ENV: 105 if self.DOT_ENV_FILE or not _default_dotenv_loaded: 106 dotenv.load_dotenv(override=True, dotenv_path=self.DOT_ENV_FILE) 107 if not self.DOT_ENV_FILE: 108 _default_dotenv_loaded = True 109 110 def _update_from_env(self): 111 for f in fields(self): 112 if f.metadata.get("_from_env") and getattr(self, f.name) is _MISSING: 113 env_name = f.name.upper() 114 default = f.metadata["_default"] 115 dtype = f.metadata.get("_dtype") 116 if dtype is bool: 117 val_from_env = get_bool_from_env(env_name, default) 118 elif dtype in [dict, list]: 119 val_from_env = get_object_from_env(env_name, dtype, default) 120 else: 121 val_from_env = os.getenv(env_name, default) 122 123 setattr(self, f.name, val_from_env) 124 125 def __iter__(self): 126 for f in fields(self): 127 value = getattr(self, f.name) 128 yield f.name, value
Base class for configuration dataclasses
158@dataclass 159class LLMConfig(BaseConfig, _OpenAIEnvVars, _AnthropicEnvVars, _GoogleVertexAiEnvVars): 160 """LLM configuration""" 161 162 LLM_API_TYPE: str = from_env() 163 """ 164 See `ApiType`. 165 To use services that is not listed in `ApiType`, 166 but provides OpenAPI interface, use `ApiType.OPEN_AI`""" 167 168 LLM_API_KEY: str = from_env() 169 LLM_API_BASE: str = from_env() 170 """Base URL for the LLM API, e.g. https://api.openai.com/v1""" 171 172 LLM_API_VERSION: str = from_env() 173 LLM_DEPLOYMENT_ID: str = from_env() 174 """Required by `ApiType.AZURE`""" 175 176 MODEL: str = from_env() 177 """Language model name""" 178 179 TIKTOKEN_ENCODING: str = from_env() 180 """Will enforce using specific encoding for token size measurement""" 181 182 LLM_DEFAULT_ARGS: dict = from_env(dtype=dict) 183 """ 184 You may specify here default arguments for the LLM API calls, 185 i. e. temperature, max_tokens, etc. 186 """ 187 188 AZURE_DEPLOYMENT_ID: str = from_env() 189 190 INFERENCE_FUNC: Union[Callable, str] = from_env() 191 """Inference function for local models""" 192 CHAT_MODE: bool = from_env(dtype=bool) 193 """Is it a chat or completion model""" 194 INIT_PARAMS: dict = from_env(dtype=dict) 195 """Custom initialization parameters for the model""" 196 197 def __post_init__(self): 198 super().__post_init__() 199 self._init_llm_options() 200 self.validate() 201 202 def uses_local_model(self) -> bool: 203 return ApiType.is_local(self.LLM_API_TYPE) 204 205 def _init_llm_options(self): 206 if self.INFERENCE_FUNC: 207 if not self.LLM_API_TYPE: 208 self.LLM_API_TYPE = ApiType.FUNCTION 209 if self.uses_local_model(): 210 return 211 212 # Use defaults from ENV variables expected by OpenAI API 213 self.LLM_API_TYPE = self.LLM_API_TYPE or self.OPENAI_API_TYPE 214 215 if self.LLM_API_TYPE == ApiType.AZURE: 216 self.LLM_API_VERSION = self.LLM_API_VERSION or self.OPENAI_API_VERSION 217 self.LLM_DEPLOYMENT_ID = self.LLM_DEPLOYMENT_ID or self.AZURE_DEPLOYMENT_ID 218 elif self.LLM_API_TYPE == ApiType.GOOGLE_AI_STUDIO: 219 self.MODEL = self.MODEL or "gemini-pro" 220 elif self.LLM_API_TYPE == ApiType.GOOGLE_VERTEX_AI: 221 self.MODEL = self.MODEL or "gemini-1.0-pro" 222 if self.GOOGLE_VERTEX_GCLOUD_AUTH is None: 223 self.GOOGLE_VERTEX_GCLOUD_AUTH = get_bool_from_env( 224 "GOOGLE_VERTEX_GCLOUD_AUTH", not self.GOOGLE_VERTEX_ACCESS_TOKEN 225 ) 226 elif self.LLM_API_TYPE == ApiType.ANYSCALE: 227 self.LLM_API_BASE = ( 228 self.LLM_API_BASE or "https://api.endpoints.anyscale.com/v1" 229 ) 230 self.MODEL = self.MODEL or "meta-llama/Llama-2-70b-chat-hf" 231 elif self.LLM_API_TYPE == ApiType.DEEP_INFRA: 232 self.LLM_API_BASE = ( 233 self.LLM_API_BASE or "https://api.deepinfra.com/v1/openai" 234 ) 235 self.MODEL = self.MODEL or "meta-llama/Llama-2-70b-chat-hf" 236 elif self.LLM_API_TYPE == ApiType.ANTHROPIC: 237 self.LLM_API_BASE = self.LLM_API_BASE or "https://api.anthropic.com/" 238 self.MODEL = self.MODEL or "claude-3-opus-20240229" 239 self.LLM_API_KEY = self.LLM_API_KEY or self.ANTHROPIC_API_KEY 240 else: 241 self.LLM_API_BASE = self.LLM_API_BASE or self.OPENAI_API_BASE 242 self.LLM_API_KEY = self.LLM_API_KEY or self.OPENAI_API_KEY 243 self.LLM_API_VERSION = self.LLM_API_VERSION or self.OPENAI_API_VERSION 244 self.MODEL = self.MODEL or "gpt-3.5-turbo" 245 246 def _validate_local_llm(self): 247 if self.CHAT_MODE is None: 248 logging.warning( 249 "When using local models, " 250 "(bool)CHAT_MODE configuration option should be explicitly set" 251 ) 252 if self.LLM_API_TYPE == ApiType.FUNCTION: 253 if not self.INFERENCE_FUNC: 254 raise LLMConfigError( 255 "LLM configuration error: " 256 "INFERENCE_FUNC should be provided for local models" 257 ) 258 elif self.LLM_API_TYPE == ApiType.TRANSFORMERS: 259 if not self.MODEL: 260 raise LLMConfigError( 261 "LLM configuration error: " 262 "MODEL should be provided for local transformers models" 263 ) 264 265 def validate(self): 266 """ 267 Validate LLM configuration 268 269 Raises: 270 LLMConfigError 271 """ 272 if self.LLM_API_TYPE == ApiType.NONE: 273 return 274 if self.uses_local_model(): 275 self._validate_local_llm() 276 return 277 if self.INFERENCE_FUNC: 278 raise LLMConfigError( 279 "LLM configuration error: INFERENCE_FUNC should be provided only for local models" 280 ) 281 if self.LLM_API_TYPE == ApiType.GOOGLE_VERTEX_AI: 282 if ( 283 not self.GOOGLE_VERTEX_ACCESS_TOKEN 284 and not self.GOOGLE_VERTEX_GCLOUD_AUTH 285 ): 286 raise LLMConfigError( 287 "LLM configuration error: " 288 "GOOGLE_VERTEX_ACCESS_TOKEN should be provided " 289 "or GOOGLE_VERTEX_GCLOUD_AUTH should be enabled" 290 ) 291 else: 292 if not self.LLM_API_KEY: 293 raise LLMConfigError("LLM configuration error: LLM_API_KEY is absent") 294 if self.LLM_API_TYPE == ApiType.AZURE: 295 if not self.LLM_API_BASE: 296 raise LLMConfigError( 297 "LLM configuration error: " 298 "LLM_API_BASE is required for using Azure models" 299 ) 300 if not self.LLM_DEPLOYMENT_ID: 301 raise LLMConfigError( 302 "LLM configuration error: " 303 "LLM_DEPLOYMENT_ID is required for using Azure models" 304 ) 305 if not self.LLM_API_VERSION: 306 raise LLMConfigError( 307 "LLM configuration error: " 308 "LLM_API_VERSION is required for using Azure models" 309 ) 310 311 def describe(self, return_dict=False): 312 """ 313 Informal description of the configuration 314 """ 315 prev_env = os.environ.copy() 316 os.environ.clear() 317 default = Config(LLM_API_TYPE=ApiType.NONE, USE_DOT_ENV=False) 318 os.environ.update(prev_env) 319 data = { 320 k.lower().replace("llm_", ""): v 321 for k, v in dict(self).items() 322 if v is not None and v != getattr(default, k) and k != "USE_DOT_ENV" 323 } 324 for k, v in data.items(): 325 if "_key" in k and isinstance(v, str): 326 if len(v) <= 3: 327 continue 328 data[k] = v[: 1 if len(v) <= 12 else 3] + "****" + v[-2:] 329 if return_dict: 330 return data 331 332 print("Config:") 333 for k, v in data.items(): 334 print(f" {k}: {Fore.GREEN}{v}{Fore.RESET}") 335 return None
LLM configuration
See ApiType.
To use services that is not listed in ApiType,
but provides OpenAPI interface, use ApiType.OPEN_AI
Will enforce using specific encoding for token size measurement
You may specify here default arguments for the LLM API calls, i. e. temperature, max_tokens, etc.
265 def validate(self): 266 """ 267 Validate LLM configuration 268 269 Raises: 270 LLMConfigError 271 """ 272 if self.LLM_API_TYPE == ApiType.NONE: 273 return 274 if self.uses_local_model(): 275 self._validate_local_llm() 276 return 277 if self.INFERENCE_FUNC: 278 raise LLMConfigError( 279 "LLM configuration error: INFERENCE_FUNC should be provided only for local models" 280 ) 281 if self.LLM_API_TYPE == ApiType.GOOGLE_VERTEX_AI: 282 if ( 283 not self.GOOGLE_VERTEX_ACCESS_TOKEN 284 and not self.GOOGLE_VERTEX_GCLOUD_AUTH 285 ): 286 raise LLMConfigError( 287 "LLM configuration error: " 288 "GOOGLE_VERTEX_ACCESS_TOKEN should be provided " 289 "or GOOGLE_VERTEX_GCLOUD_AUTH should be enabled" 290 ) 291 else: 292 if not self.LLM_API_KEY: 293 raise LLMConfigError("LLM configuration error: LLM_API_KEY is absent") 294 if self.LLM_API_TYPE == ApiType.AZURE: 295 if not self.LLM_API_BASE: 296 raise LLMConfigError( 297 "LLM configuration error: " 298 "LLM_API_BASE is required for using Azure models" 299 ) 300 if not self.LLM_DEPLOYMENT_ID: 301 raise LLMConfigError( 302 "LLM configuration error: " 303 "LLM_DEPLOYMENT_ID is required for using Azure models" 304 ) 305 if not self.LLM_API_VERSION: 306 raise LLMConfigError( 307 "LLM configuration error: " 308 "LLM_API_VERSION is required for using Azure models" 309 )
Validate LLM configuration
Raises:
- LLMConfigError
311 def describe(self, return_dict=False): 312 """ 313 Informal description of the configuration 314 """ 315 prev_env = os.environ.copy() 316 os.environ.clear() 317 default = Config(LLM_API_TYPE=ApiType.NONE, USE_DOT_ENV=False) 318 os.environ.update(prev_env) 319 data = { 320 k.lower().replace("llm_", ""): v 321 for k, v in dict(self).items() 322 if v is not None and v != getattr(default, k) and k != "USE_DOT_ENV" 323 } 324 for k, v in data.items(): 325 if "_key" in k and isinstance(v, str): 326 if len(v) <= 3: 327 continue 328 data[k] = v[: 1 if len(v) <= 12 else 3] + "****" + v[-2:] 329 if return_dict: 330 return data 331 332 print("Config:") 333 for k, v in data.items(): 334 print(f" {k}: {Fore.GREEN}{v}{Fore.RESET}") 335 return None
Informal description of the configuration
Inherited Members
LLM configuration error
Inherited Members
- builtins.ValueError
- ValueError
- builtins.BaseException
- with_traceback
- add_note
- args
342@dataclass 343class Config(LLMConfig): 344 """MicroCore configuration""" 345 346 USE_LOGGING: bool = from_env(default=False) 347 """Whether to use logging or not, see `microcore.use_logging`""" 348 349 PROMPT_TEMPLATES_PATH: str | Path = from_env("tpl") 350 """Path to the folder with prompt templates, ./tpl by default""" 351 352 STORAGE_PATH: str | Path = from_env("storage") 353 """Path to the folder with file storage, ./storage by default""" 354 355 STORAGE_DEFAULT_FILE_EXT: str = from_env(default="") 356 357 EMBEDDING_DB_FOLDER: str = from_env(default="embedding_db") 358 """Folder within microcore.config.Config.STORAGE_PATH for storing embeddings""" 359 360 EMBEDDING_DB_FUNCTION: Any = from_env() 361 362 EMBEDDING_DB_ALLOW_DUPLICATES: bool = from_env(dtype=bool, default=False) 363 364 DEFAULT_ENCODING: str = from_env("utf-8") 365 """Used in file system operations, utf-8 by default""" 366 367 JINJA2_AUTO_ESCAPE: bool = from_env(dtype=bool, default=False) 368 369 ELEVENLABS_API_KEY: str = from_env() 370 371 TEXT_TO_SPEECH_PATH: str | Path = from_env() 372 """Path to the folder with generated voice files""" 373 374 MAX_CONCURRENT_TASKS: int = from_env(default=None) 375 376 def __post_init__(self): 377 super().__post_init__() 378 if self.TEXT_TO_SPEECH_PATH is None: 379 self.TEXT_TO_SPEECH_PATH = Path(self.STORAGE_PATH) / "voicing"
MicroCore configuration
Path to the folder with prompt templates, ./tpl by default
Path to the folder with file storage, ./storage by default
Path to the folder with generated voice files