vllm-project · AlonKellner-RedHat · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/pylock.toml b/pylock.toml
diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -370,6 +370,16 @@ def benchmark():
     default=BenchmarkGenerativeTextArgs.get_default("max_global_error_rate"),
     help="Maximum global error rate across all benchmarks.",
 )
+@click.option(
+    "--stop-over-saturated",
+    type=bool,
+    default=BenchmarkGenerativeTextArgs.get_default("stop_over_saturated"),
+    help=(
+        "Set this flag to stop the benchmark if the model is over-saturated. "
+        "Defaults to False."
+    ),
+    is_flag=True,
+)
 def run(**kwargs):
     request_type = kwargs.pop("request_type", None)
     request_formatter_kwargs = kwargs.pop("request_formatter_kwargs", None)

diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py
@@ -267,6 +267,7 @@ async def resolve_profile(
     max_errors: int | None,
     max_error_rate: float | None,
     max_global_error_rate: float | None,
+    stop_over_saturated: bool | None = None,
     console: Console | None = None,
 ) -> Profile:
     """
@@ -281,6 +282,7 @@ async def resolve_profile(
     :param max_errors: Maximum number of errors before stopping
     :param max_error_rate: Maximum error rate threshold before stopping
     :param max_global_error_rate: Maximum global error rate threshold before stopping
+    :param stop_over_saturated: Whether to stop if over-saturation is detected
     :param console: Console instance for progress reporting, or None
     :return: Configured Profile instance ready for benchmarking
     :raises ValueError: If constraints are provided with a pre-configured Profile
@@ -297,6 +299,7 @@ async def resolve_profile(
         "max_errors": max_errors,
         "max_error_rate": max_error_rate,
         "max_global_error_rate": max_global_error_rate,
+        "stop_over_saturated": stop_over_saturated,
     }.items():
         if val is not None:
             constraints[key] = val
@@ -412,6 +415,7 @@ async def benchmark_generative_text(
         max_errors=args.max_errors,
         max_error_rate=args.max_error_rate,
         max_global_error_rate=args.max_global_error_rate,
+        stop_over_saturated=args.stop_over_saturated,
         console=console,
     )
     output_formats = await resolve_output_formats(

diff --git a/src/guidellm/benchmark/progress.py b/src/guidellm/benchmark/progress.py
@@ -18,7 +18,6 @@
 
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from datetime import datetime
 from typing import Any, Generic, Literal
 
 from rich.console import Group
@@ -42,7 +41,7 @@
     GenerativeBenchmark,
 )
 from guidellm.scheduler import SchedulerState, SchedulingStrategy, StrategyType
-from guidellm.utils import Colors, format_value_display
+from guidellm.utils import Colors, format_value_display, safe_format_timestamp
 
 __all__ = ["BenchmarkerProgress", "GenerativeConsoleBenchmarkerProgress"]
 
@@ -383,7 +382,7 @@ def formatted_start_time(self) -> str:
         if self.start_time < 0.0:
             return "--:--:--"
 
-        return datetime.fromtimestamp(self.start_time).strftime("%H:%M:%S")
+        return safe_format_timestamp(self.start_time, format_="%H:%M:%S")
 
     @property
     def formatted_progress_status(self) -> str:

diff --git a/src/guidellm/benchmark/schemas.py b/src/guidellm/benchmark/schemas.py
@@ -1952,6 +1952,10 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
     max_global_error_rate: float | None = Field(
         default=None, description="Maximum global error rate (0-1) before stopping"
     )
+    stop_over_saturated: bool | None = Field(
+        default=None,
+        description="Whether to stop the benchmark if the model is over-saturated",
+    )
 
     @field_validator("data", "data_args", "rate", mode="wrap")
     @classmethod

diff --git a/src/guidellm/scheduler/__init__.py b/src/guidellm/scheduler/__init__.py
@@ -19,6 +19,9 @@
     MaxErrorsConstraint,
     MaxGlobalErrorRateConstraint,
     MaxNumberConstraint,
+    OverSaturationConstraint,
+    OverSaturationConstraintInitializer,
+    OverSaturationDetector,
     PydanticConstraintInitializer,
     SerializableConstraintInitializer,
     UnserializableConstraintInitializer,
@@ -66,6 +69,9 @@
     "MaxNumberConstraint",
     "MultiTurnRequestT",
     "NonDistributedEnvironment",
+    "OverSaturationConstraint",
+    "OverSaturationConstraintInitializer",
+    "OverSaturationDetector",
     "PydanticConstraintInitializer",
     "RequestT",
     "ResponseT",

diff --git a/src/guidellm/scheduler/constraints/__init__.py b/src/guidellm/scheduler/constraints/__init__.py
@@ -0,0 +1,51 @@
+"""
+Constraint system for scheduler behavior control and request processing limits.
+
+Provides flexible constraints for managing scheduler behavior with configurable
+thresholds based on time, error rates, and request counts. Constraints evaluate
+scheduler state and individual requests to determine whether processing should
+continue or stop based on predefined limits. The constraint system enables
+sophisticated benchmark stopping criteria through composable constraint types.
+"""
+
+from .base import (
+    PydanticConstraintInitializer,
+    UnserializableConstraintInitializer,
+)
+from .factory import ConstraintsInitializerFactory
+from .over_saturation import (
+    OverSaturationConstraint,
+    OverSaturationConstraintInitializer,
+    OverSaturationDetector,
+)
+from .protocols import (
+    Constraint,
+    ConstraintInitializer,
+    SerializableConstraintInitializer,
+)
+from .standard import (
+    MaxDurationConstraint,
+    MaxErrorRateConstraint,
+    MaxErrorsConstraint,
+    MaxGlobalErrorRateConstraint,
+    MaxNumberConstraint,
+    RequestsExhaustedConstraint,
+)
+
+__all__ = [
+    "Constraint",
+    "ConstraintInitializer",
+    "ConstraintsInitializerFactory",
+    "MaxDurationConstraint",
+    "MaxErrorRateConstraint",
+    "MaxErrorsConstraint",
+    "MaxGlobalErrorRateConstraint",
+    "MaxNumberConstraint",
+    "OverSaturationConstraint",
+    "OverSaturationConstraintInitializer",
+    "OverSaturationDetector",
+    "PydanticConstraintInitializer",
+    "RequestsExhaustedConstraint",
+    "SerializableConstraintInitializer",
+    "UnserializableConstraintInitializer",
+]
diff --git a/src/guidellm/scheduler/constraints/base.py b/src/guidellm/scheduler/constraints/base.py
@@ -0,0 +1,139 @@
+"""
+Base classes for constraint initializers.
+
+Provides abstract base classes and utility classes for creating constraint
+initializers with Pydantic validation and serialization support.
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Any, Literal
+
+from pydantic import Field
+
+from guidellm.scheduler.schemas import SchedulerState, SchedulerUpdateAction
+from guidellm.schemas import RequestInfo
+from guidellm.utils import InfoMixin, StandardBaseModel
+
+from .protocols import Constraint
+
+__all__ = [
+    "PydanticConstraintInitializer",
+    "UnserializableConstraintInitializer",
+]
+
+
+class PydanticConstraintInitializer(StandardBaseModel, ABC, InfoMixin):
+    """
+    Abstract base for Pydantic-based constraint initializers.
+
+    Provides standardized serialization, validation, and metadata handling for
+    constraint initializers using Pydantic models. Subclasses implement specific
+    constraint creation logic while inheriting validation and persistence support.
+    """
+
+    type_: str = Field(description="Type identifier for the constraint initializer")
+
+    @property
+    def info(self) -> dict[str, Any]:
+        """
+        Extract serializable information from this constraint initializer.
+
+        :return: Dictionary containing constraint configuration and metadata
+        """
+        return self.model_dump()
+
+    @classmethod
+    @abstractmethod
+    def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]:
+        """
+        Validate and process arguments for constraint creation.
+
+        Must be implemented by subclasses to handle their specific parameter patterns
+        and validation requirements.
+
+        :param args: Positional arguments passed to the constraint
+        :param kwargs: Keyword arguments passed to the constraint
+        :return: Validated dictionary of parameters for constraint creation
+        :raises NotImplementedError: Must be implemented by subclasses
+        """
+        ...
+
+    @abstractmethod
+    def create_constraint(self, **kwargs) -> Constraint:
+        """
+        Create a constraint instance.
+
+        Must be implemented by subclasses to return their specific constraint type
+        with appropriate configuration and validation.
+
+        :param kwargs: Additional keyword arguments (usually unused)
+        :return: Configured constraint instance
+        :raises NotImplementedError: Must be implemented by subclasses
+        """
+        ...
+
+
+class UnserializableConstraintInitializer(PydanticConstraintInitializer):
+    """
+    Placeholder for constraints that cannot be serialized or executed.
+
+    Represents constraint initializers that failed serialization or contain
+    non-serializable components. Cannot be executed and raises errors when
+    invoked to prevent runtime failures from invalid constraint state.
+    """
+
+    type_: Literal["unserializable"] = "unserializable"  # type: ignore[assignment]
+    orig_info: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Original constraint information before serialization failure",
+    )
+
+    @classmethod
+    def validated_kwargs(
+        cls,
+        orig_info: dict[str, Any] | None = None,
+        **kwargs,  # noqa: ARG003
+    ) -> dict[str, Any]:
+        """
+        Validate arguments for unserializable constraint creation.
+
+        :param orig_info: Original constraint information before serialization failure
+        :param kwargs: Additional arguments (ignored)
+        :return: Validated parameters for unserializable constraint creation
+        """
+        return {"orig_info": orig_info or {}}
+
+    def create_constraint(
+        self,
+        **kwargs,  # noqa: ARG002
+    ) -> Constraint:
+        """
+        Raise error for unserializable constraint creation attempt.
+
+        :param kwargs: Additional keyword arguments (unused)
+        :raises RuntimeError: Always raised since unserializable constraints
+            cannot be executed
+        """
+        raise RuntimeError(
+            "Cannot create constraint from unserializable constraint instance. "
+            "This constraint cannot be serialized and therefore cannot be executed."
+        )
+
+    def __call__(
+        self,
+        state: SchedulerState,  # noqa: ARG002
+        request: RequestInfo,  # noqa: ARG002
+    ) -> SchedulerUpdateAction:
+        """
+        Raise error since unserializable constraints cannot be invoked.
+
+        :param state: Current scheduler state (unused)
+        :param request: Individual request information (unused)
+        :raises RuntimeError: Always raised for unserializable constraints
+        """
+        raise RuntimeError(
+            "Cannot invoke unserializable constraint instance. "
+            "This constraint was not properly serialized and cannot be executed."
+        )