Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ run:
--queue-size 100

install:
pip install -e .
pip install -e .
43 changes: 40 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@ The server supports six types of MLX models:

### Flux-Series Image Models

The server supports multiple Flux and Qwen model configurations for advanced image generation and editing:
> **⚠️ Note:** Image generation and editing capabilities require installation of `mflux`: `pip install mlx-openai-server[image-generation]` or `pip install git+https://github.com/cubist38/mflux.git`

The server supports multiple Flux model configurations for advanced image generation and editing:

#### Image Generation Models
- **`flux-schnell`** - Fast generation with 4 default steps, no guidance (best for quick iterations)
Expand Down Expand Up @@ -202,6 +204,9 @@ Follow these steps to set up the MLX-powered server:
git clone https://github.com/cubist38/mlx-openai-server.git
cd mlx-openai-server
pip install -e .

# Optional: For image generation/editing support
pip install -e .[image-generation]
```

### Using Conda (Recommended)
Expand Down Expand Up @@ -236,6 +241,9 @@ For better environment management and to avoid architecture issues, we recommend
git clone https://github.com/cubist38/mlx-openai-server.git
cd mlx-openai-server
pip install -e .

# Optional: For image generation/editing support
pip install -e .[image-generation]
```

### Optional Dependencies
Expand All @@ -253,15 +261,44 @@ pip install mlx-openai-server
- All core API endpoints and functionality

#### Image Generation & Editing Support
The server includes support for image generation and editing capabilities:
For image generation and editing capabilities, install with the image-generation extra:

```bash
# Install with image generation support
pip install mlx-openai-server[image-generation]
```

Or install manually:
```bash
# First install the base server
pip install mlx-openai-server

# Then install mflux for image generation/editing support
pip install git+https://github.com/cubist38/mflux.git
```

**Additional features:**
**Additional features with mflux:**
- Image generation models (`--model-type image-generation`)
- Image editing models (`--model-type image-edit`)
- MLX Flux-series model support
- Qwen Image model support
- LoRA adapter support for fine-tuned generation and editing

#### Enhanced Caching Support
For enhanced caching and performance when working with complex ML models and objects, install with the enhanced-caching extra:

```bash
# Install with enhanced caching support
pip install mlx-openai-server[enhanced-caching]
```

This enables better serialization and caching of objects from:
- spaCy (NLP processing)
- regex (regular expressions)
- tiktoken (tokenization)
- torch (PyTorch tensors and models)
- transformers (Hugging Face models)

#### Whisper Models Support
For whisper models to work properly, you need to install ffmpeg:

Expand Down
8 changes: 1 addition & 7 deletions app/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1 @@
import os
from .version import __version__

# Suppress transformers warnings
os.environ['TRANSFORMERS_VERBOSITY'] = 'error'

__all__ = ["__version__"]
"""MLX OpenAI Server package."""
99 changes: 71 additions & 28 deletions app/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
the ASGI server.
"""

from __future__ import annotations

import asyncio
import sys

Expand All @@ -17,7 +19,7 @@
from .version import __version__


class UpperChoice(click.Choice):
class UpperChoice(click.Choice[str]):
"""Case-insensitive choice type that returns uppercase values.

This small convenience subclass normalizes user input in a
Expand All @@ -26,7 +28,7 @@ class UpperChoice(click.Choice):
where the internal representation is uppercased.
"""

def normalize_choice(self, choice, ctx):
def normalize_choice(self, choice: str | None, ctx: click.Context | None) -> str | None: # type: ignore[override]
"""Return the canonical uppercase choice or raise BadParameter.

Parameters
Expand Down Expand Up @@ -75,20 +77,19 @@ def normalize_choice(self, choice, ctx):
🚀 Version: %(version)s
""",
)
def cli():
def cli() -> None:
"""Top-level Click command group for the MLX server CLI.

Subcommands (such as ``launch``) are registered on this group and
invoked by the console entry point.
"""
pass


@cli.command()
@cli.command(help="Start the MLX OpenAI Server with the supplied flags")
@click.option(
"--model-path",
required=True,
help="Path to the model (required for lm, multimodal, embeddings, image-generation, image-edit, whisper model types). With `image-generation` or `image-edit` model types, it should be the local path to the model.",
help="Path to the model (required for lm, multimodal, embeddings, image-generation, image-edit, whisper model types). Can be a local path or Hugging Face repository ID (e.g., 'blackforestlabs/FLUX.1-dev').",
)
@click.option(
"--model-type",
Expand Down Expand Up @@ -186,35 +187,77 @@ def cli():
help="Path to a custom chat template file. Only works with language models (lm) and multimodal models.",
)
def launch(
model_path,
model_type,
context_length,
port,
host,
max_concurrency,
queue_timeout,
queue_size,
quantize,
config_name,
lora_paths,
lora_scales,
disable_auto_resize,
log_file,
no_log_file,
log_level,
enable_auto_tool_choice,
tool_call_parser,
reasoning_parser,
trust_remote_code,
chat_template_file,
model_path: str,
model_type: str,
context_length: int,
port: int,
host: str,
max_concurrency: int,
queue_timeout: int,
queue_size: int,
quantize: int,
config_name: str | None,
lora_paths: str | None,
lora_scales: str | None,
disable_auto_resize: bool,
log_file: str | None,
no_log_file: bool,
log_level: str,
enable_auto_tool_choice: bool,
tool_call_parser: str | None,
reasoning_parser: str | None,
trust_remote_code: bool,
chat_template_file: str | None,
) -> None:
"""Start the FastAPI/Uvicorn server with the supplied flags.

The command builds a server configuration object using
``MLXServerConfig`` and then calls the async ``start`` routine
which handles the event loop and server lifecycle.
"""

Parameters
----------
model_path : str
Path to the model (required for lm, multimodal, embeddings, image-generation, image-edit, whisper model types).
model_type : str
Type of model to run (lm, multimodal, image-generation, image-edit, embeddings, whisper).
context_length : int
Context length for language models.
port : int
Port to run the server on.
host : str
Host to run the server on.
max_concurrency : int
Maximum number of concurrent requests.
queue_timeout : int
Request timeout in seconds.
queue_size : int
Maximum queue size for pending requests.
quantize : int
Quantization level for the model.
config_name : str or None
Config name of the model.
lora_paths : str or None
Path to the LoRA file(s).
lora_scales : str or None
Scale factor for the LoRA file(s).
disable_auto_resize : bool
Disable automatic model resizing.
log_file : str or None
Path to log file.
no_log_file : bool
Disable file logging entirely.
log_level : str
Set the logging level.
enable_auto_tool_choice : bool
Enable automatic tool choice.
tool_call_parser : str or None
Specify tool call parser to use.
reasoning_parser : str or None
Specify reasoning parser to use.
trust_remote_code : bool
Enable trust_remote_code when loading models.
"""
args = MLXServerConfig(
model_path=model_path,
model_type=model_type,
Expand Down
9 changes: 3 additions & 6 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,14 @@ class MLXServerConfig:
lora_paths_str: str | None = None
lora_scales_str: str | None = None

def __post_init__(self):
def __post_init__(self) -> None:
"""Normalize certain CLI fields after instantiation.

- Convert comma-separated ``lora_paths`` and ``lora_scales`` into
lists when provided.
- Apply small model-type-specific defaults for ``config_name``
and emit warnings when values appear inconsistent.
"""

# Process comma-separated LoRA paths and scales into lists (or None)
if self.lora_paths_str:
self.lora_paths = [p.strip() for p in self.lora_paths_str.split(",") if p.strip()]
Expand All @@ -74,11 +73,9 @@ def __post_init__(self):
# image-edit model types. If missing for those types, set defaults.
if self.config_name and self.model_type not in ["image-generation", "image-edit"]:
logger.warning(
"Config name parameter '%s' provided but model type is '%s'. "
f"Config name parameter '{self.config_name}' provided but model type is '{self.model_type}'. "
"Config name is only used with image-generation "
"and image-edit models.",
self.config_name,
self.model_type,
"and image-edit models."
)
elif self.model_type == "image-generation" and not self.config_name:
logger.warning(
Expand Down
27 changes: 20 additions & 7 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,19 @@
from .version import __version__


def print_startup_banner(config_args):
"""Log a compact startup banner describing the selected config.
def print_startup_banner(config_args: MLXServerConfig) -> None:
"""
Log a compact startup banner describing the selected config.

The function emits human-friendly log messages that summarize the
runtime configuration (model path/type, host/port, concurrency,
LoRA settings, and logging options). Intended for the user-facing
startup output only.

Parameters
----------
config_args : MLXServerConfig
Configuration object containing runtime settings to display.
"""
logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
logger.info(f"✨ MLX Server v{__version__} Starting ✨")
Expand Down Expand Up @@ -78,12 +84,18 @@ def print_startup_banner(config_args):


async def start(config: MLXServerConfig) -> None:
"""Run the ASGI server using the provided configuration.
"""
Run the ASGI server using the provided configuration.

This coroutine wires the configuration into the server setup
routine, logs progress, and starts the Uvicorn server. It handles
KeyboardInterrupt and logs any startup failures before exiting the
process with a non-zero code.

Parameters
----------
config : MLXServerConfig
Configuration object for server setup.
"""
try:
# Display startup information
Expand All @@ -98,19 +110,20 @@ async def start(config: MLXServerConfig) -> None:
except KeyboardInterrupt:
logger.info("Server shutdown requested by user. Exiting...")
except Exception as e:
logger.error(f"Server startup failed: {str(e)}")
logger.error(f"Server startup failed. {type(e).__name__}: {e}")
sys.exit(1)


def main():
"""Normalize process args and dispatch to the Click CLI.
def main() -> None:
"""
Normalize process args and dispatch to the Click CLI.

This helper gathers command-line arguments, inserts the "launch"
subcommand when a subcommand is omitted for backwards compatibility,
and delegates execution to :func:`app.cli.cli` through
``cli.main``.
"""
from .cli import cli
from .cli import cli # noqa: PLC0415

args = [str(x) for x in sys.argv[1:]]
# Keep backwards compatibility: Add 'launch' subcommand if none is provided
Expand Down
Loading