cubist38 · Snuffy2 · Nov 23, 2025
diff --git a/Makefile b/Makefile
@@ -7,4 +7,4 @@ run:
 	--queue-size 100
 
 install:
-	pip install -e .
+	pip install -e .
diff --git a/README.md b/README.md
@@ -108,7 +108,9 @@ The server supports six types of MLX models:
 
 ### Flux-Series Image Models
 
-The server supports multiple Flux and Qwen model configurations for advanced image generation and editing:
+> **⚠️ Note:** Image generation and editing capabilities require installation of `mflux`: `pip install mlx-openai-server[image-generation]` or `pip install git+https://github.com/cubist38/mflux.git`
+
+The server supports multiple Flux model configurations for advanced image generation and editing:
 
 #### Image Generation Models
 - **`flux-schnell`** - Fast generation with 4 default steps, no guidance (best for quick iterations)
@@ -202,6 +204,9 @@ Follow these steps to set up the MLX-powered server:
     git clone https://github.com/cubist38/mlx-openai-server.git
     cd mlx-openai-server
     pip install -e .
+
+    # Optional: For image generation/editing support
+    pip install -e .[image-generation]
     ```
 
 ### Using Conda (Recommended)
@@ -236,6 +241,9 @@ For better environment management and to avoid architecture issues, we recommend
     git clone https://github.com/cubist38/mlx-openai-server.git
     cd mlx-openai-server
     pip install -e .
+
+    # Optional: For image generation/editing support
+    pip install -e .[image-generation]
     ```
 
 ### Optional Dependencies
@@ -253,15 +261,44 @@ pip install mlx-openai-server
 - All core API endpoints and functionality
 
 #### Image Generation & Editing Support
-The server includes support for image generation and editing capabilities:
+For image generation and editing capabilities, install with the image-generation extra:
+
+```bash
+# Install with image generation support
+pip install mlx-openai-server[image-generation]
+```
+
+Or install manually:
+```bash
+# First install the base server
+pip install mlx-openai-server
+
+# Then install mflux for image generation/editing support
+pip install git+https://github.com/cubist38/mflux.git
+```
 
-**Additional features:**
+**Additional features with mflux:**
 - Image generation models (`--model-type image-generation`)
 - Image editing models (`--model-type image-edit`)
 - MLX Flux-series model support
 - Qwen Image model support
 - LoRA adapter support for fine-tuned generation and editing
 
+#### Enhanced Caching Support
+For enhanced caching and performance when working with complex ML models and objects, install with the enhanced-caching extra:
+
+```bash
+# Install with enhanced caching support
+pip install mlx-openai-server[enhanced-caching]
+```
+
+This enables better serialization and caching of objects from:
+- spaCy (NLP processing)
+- regex (regular expressions)
+- tiktoken (tokenization)
+- torch (PyTorch tensors and models)
+- transformers (Hugging Face models)
+
 #### Whisper Models Support
 For whisper models to work properly, you need to install ffmpeg:
 

diff --git a/app/__init__.py b/app/__init__.py
@@ -1,7 +1 @@
-import os
-from .version import __version__
-
-# Suppress transformers warnings
-os.environ['TRANSFORMERS_VERBOSITY'] = 'error'
-
-__all__ = ["__version__"]
+"""MLX OpenAI Server package."""
diff --git a/app/cli.py b/app/cli.py
@@ -5,6 +5,8 @@
 the ASGI server.
 """
 
+from __future__ import annotations
+
 import asyncio
 import sys
 
@@ -17,7 +19,7 @@
 from .version import __version__
 
 
-class UpperChoice(click.Choice):
+class UpperChoice(click.Choice[str]):
     """Case-insensitive choice type that returns uppercase values.
 
     This small convenience subclass normalizes user input in a
@@ -26,7 +28,7 @@ class UpperChoice(click.Choice):
     where the internal representation is uppercased.
     """
 
-    def normalize_choice(self, choice, ctx):
+    def normalize_choice(self, choice: str | None, ctx: click.Context | None) -> str | None:  # type: ignore[override]
         """Return the canonical uppercase choice or raise BadParameter.
 
         Parameters
@@ -75,20 +77,19 @@ def normalize_choice(self, choice, ctx):
 🚀 Version: %(version)s
 """,
 )
-def cli():
+def cli() -> None:
     """Top-level Click command group for the MLX server CLI.
 
     Subcommands (such as ``launch``) are registered on this group and
     invoked by the console entry point.
     """
-    pass
 
 
-@cli.command()
+@cli.command(help="Start the MLX OpenAI Server with the supplied flags")
 @click.option(
     "--model-path",
     required=True,
-    help="Path to the model (required for lm, multimodal, embeddings, image-generation, image-edit, whisper model types). With `image-generation` or `image-edit` model types, it should be the local path to the model.",
+    help="Path to the model (required for lm, multimodal, embeddings, image-generation, image-edit, whisper model types). Can be a local path or Hugging Face repository ID (e.g., 'blackforestlabs/FLUX.1-dev').",
 )
 @click.option(
     "--model-type",
@@ -186,35 +187,77 @@ def cli():
     help="Path to a custom chat template file. Only works with language models (lm) and multimodal models.",
 )
 def launch(
-    model_path,
-    model_type,
-    context_length,
-    port,
-    host,
-    max_concurrency,
-    queue_timeout,
-    queue_size,
-    quantize,
-    config_name,
-    lora_paths,
-    lora_scales,
-    disable_auto_resize,
-    log_file,
-    no_log_file,
-    log_level,
-    enable_auto_tool_choice,
-    tool_call_parser,
-    reasoning_parser,
-    trust_remote_code,
-    chat_template_file,
+    model_path: str,
+    model_type: str,
+    context_length: int,
+    port: int,
+    host: str,
+    max_concurrency: int,
+    queue_timeout: int,
+    queue_size: int,
+    quantize: int,
+    config_name: str | None,
+    lora_paths: str | None,
+    lora_scales: str | None,
+    disable_auto_resize: bool,
+    log_file: str | None,
+    no_log_file: bool,
+    log_level: str,
+    enable_auto_tool_choice: bool,
+    tool_call_parser: str | None,
+    reasoning_parser: str | None,
+    trust_remote_code: bool,
+    chat_template_file: str | None,
 ) -> None:
     """Start the FastAPI/Uvicorn server with the supplied flags.
 
     The command builds a server configuration object using
     ``MLXServerConfig`` and then calls the async ``start`` routine
     which handles the event loop and server lifecycle.
-    """
 
+    Parameters
+    ----------
+    model_path : str
+        Path to the model (required for lm, multimodal, embeddings, image-generation, image-edit, whisper model types).
+    model_type : str
+        Type of model to run (lm, multimodal, image-generation, image-edit, embeddings, whisper).
+    context_length : int
+        Context length for language models.
+    port : int
+        Port to run the server on.
+    host : str
+        Host to run the server on.
+    max_concurrency : int
+        Maximum number of concurrent requests.
+    queue_timeout : int
+        Request timeout in seconds.
+    queue_size : int
+        Maximum queue size for pending requests.
+    quantize : int
+        Quantization level for the model.
+    config_name : str or None
+        Config name of the model.
+    lora_paths : str or None
+        Path to the LoRA file(s).
+    lora_scales : str or None
+        Scale factor for the LoRA file(s).
+    disable_auto_resize : bool
+        Disable automatic model resizing.
+    log_file : str or None
+        Path to log file.
+    no_log_file : bool
+        Disable file logging entirely.
+    log_level : str
+        Set the logging level.
+    enable_auto_tool_choice : bool
+        Enable automatic tool choice.
+    tool_call_parser : str or None
+        Specify tool call parser to use.
+    reasoning_parser : str or None
+        Specify reasoning parser to use.
+    trust_remote_code : bool
+        Enable trust_remote_code when loading models.
+    """
     args = MLXServerConfig(
         model_path=model_path,
         model_type=model_type,

diff --git a/app/config.py b/app/config.py
@@ -47,15 +47,14 @@ class MLXServerConfig:
     lora_paths_str: str | None = None
     lora_scales_str: str | None = None
 
-    def __post_init__(self):
+    def __post_init__(self) -> None:
         """Normalize certain CLI fields after instantiation.
 
         - Convert comma-separated ``lora_paths`` and ``lora_scales`` into
           lists when provided.
         - Apply small model-type-specific defaults for ``config_name``
           and emit warnings when values appear inconsistent.
         """
-
         # Process comma-separated LoRA paths and scales into lists (or None)
         if self.lora_paths_str:
             self.lora_paths = [p.strip() for p in self.lora_paths_str.split(",") if p.strip()]
@@ -74,11 +73,9 @@ def __post_init__(self):
         # image-edit model types. If missing for those types, set defaults.
         if self.config_name and self.model_type not in ["image-generation", "image-edit"]:
             logger.warning(
-                "Config name parameter '%s' provided but model type is '%s'. "
+                f"Config name parameter '{self.config_name}' provided but model type is '{self.model_type}'. "
                 "Config name is only used with image-generation "
-                "and image-edit models.",
-                self.config_name,
-                self.model_type,
+                "and image-edit models."
             )
         elif self.model_type == "image-generation" and not self.config_name:
             logger.warning(

diff --git a/app/main.py b/app/main.py
@@ -27,13 +27,19 @@
 from .version import __version__
 
 
-def print_startup_banner(config_args):
-    """Log a compact startup banner describing the selected config.
+def print_startup_banner(config_args: MLXServerConfig) -> None:
+    """
+    Log a compact startup banner describing the selected config.
 
     The function emits human-friendly log messages that summarize the
     runtime configuration (model path/type, host/port, concurrency,
     LoRA settings, and logging options). Intended for the user-facing
     startup output only.
+
+    Parameters
+    ----------
+    config_args : MLXServerConfig
+        Configuration object containing runtime settings to display.
     """
     logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
     logger.info(f"✨ MLX Server v{__version__} Starting ✨")
@@ -78,12 +84,18 @@ def print_startup_banner(config_args):
 
 
 async def start(config: MLXServerConfig) -> None:
-    """Run the ASGI server using the provided configuration.
+    """
+    Run the ASGI server using the provided configuration.
 
     This coroutine wires the configuration into the server setup
     routine, logs progress, and starts the Uvicorn server. It handles
     KeyboardInterrupt and logs any startup failures before exiting the
     process with a non-zero code.
+
+    Parameters
+    ----------
+    config : MLXServerConfig
+        Configuration object for server setup.
     """
     try:
         # Display startup information
@@ -98,19 +110,20 @@ async def start(config: MLXServerConfig) -> None:
     except KeyboardInterrupt:
         logger.info("Server shutdown requested by user. Exiting...")
     except Exception as e:
-        logger.error(f"Server startup failed: {str(e)}")
+        logger.error(f"Server startup failed. {type(e).__name__}: {e}")
         sys.exit(1)
 
 
-def main():
-    """Normalize process args and dispatch to the Click CLI.
+def main() -> None:
+    """
+    Normalize process args and dispatch to the Click CLI.
 
     This helper gathers command-line arguments, inserts the "launch"
     subcommand when a subcommand is omitted for backwards compatibility,
     and delegates execution to :func:`app.cli.cli` through
     ``cli.main``.
     """
-    from .cli import cli
+    from .cli import cli  # noqa: PLC0415
 
     args = [str(x) for x in sys.argv[1:]]
     # Keep backwards compatibility: Add 'launch' subcommand if none is provided