|
5 | 5 | from pathlib import Path |
6 | 6 | from typing import ( |
7 | 7 | Any, |
| 8 | + Callable, |
8 | 9 | Generic, |
9 | | - Literal, |
10 | 10 | Optional, |
11 | 11 | Union, |
12 | 12 | ) |
13 | 13 |
|
14 | | -from pydantic import Field |
15 | 14 | from transformers import PreTrainedTokenizerBase # type: ignore # noqa: PGH003 |
16 | 15 |
|
17 | | -from guidellm.backend import Backend, ResponseSummary |
| 16 | +from guidellm.backend import Backend |
18 | 17 | from guidellm.benchmark.aggregator import ( |
19 | 18 | AggregatorT, |
20 | 19 | BenchmarkT, |
21 | 20 | GenerativeBenchmarkAggregator, |
22 | 21 | ) |
23 | 22 | from guidellm.benchmark.benchmark import BenchmarkArgs, GenerativeBenchmark |
24 | 23 | from guidellm.benchmark.profile import Profile |
25 | | -from guidellm.objects import StandardBaseModel |
26 | 24 | from guidellm.request import ( |
27 | | - GenerationRequest, |
28 | 25 | GenerativeRequestLoaderDescription, |
29 | | - RequestLoaderDescription, |
30 | 26 | ) |
31 | 27 | from guidellm.scheduler import ( |
32 | | - GenerativeRequestsWorker, |
33 | | - RequestsWorker, |
| 28 | + BackendT, |
| 29 | + Environment, |
34 | 30 | RequestT, |
35 | 31 | ResponseT, |
36 | | - Scheduler, |
37 | | - SchedulerRequestResult, |
| 32 | + ScheduledRequestInfo, |
| 33 | + SchedulerState, |
| 34 | + SchedulerUpdateAction, |
38 | 35 | SchedulingStrategy, |
39 | 36 | ) |
| 37 | +from guidellm.utils import ThreadSafeSingletonMixin |
40 | 38 |
|
41 | | -__all__ = ["Benchmarker", "BenchmarkerResult", "GenerativeBenchmarker"] |
| 39 | +__all__ = ["Benchmarker", "GenerativeBenchmarker"] |
42 | 40 |
|
43 | 41 |
|
44 | | -class BenchmarkerResult( |
45 | | - StandardBaseModel, Generic[AggregatorT, BenchmarkT, RequestT, ResponseT] |
46 | | -): |
47 | | - type_: Literal[ |
48 | | - "run_start", |
49 | | - "run_complete", |
50 | | - "scheduler_start", |
51 | | - "scheduler_update", |
52 | | - "scheduler_complete", |
53 | | - "benchmark_compiled", |
54 | | - ] |
55 | | - start_time: float |
56 | | - end_number: int |
57 | | - profile: Profile |
58 | | - current_index: int |
59 | | - current_strategy: Optional[SchedulingStrategy] = None |
60 | | - current_aggregator: Optional[AggregatorT] = None |
61 | | - current_benchmark: Optional[BenchmarkT] = None |
62 | | - current_result: Optional[SchedulerRequestResult[RequestT, ResponseT]] = None |
63 | | - |
64 | | - |
65 | | -class BenchmarkerStrategyLimits(StandardBaseModel): |
66 | | - requests_loader_size: Optional[int] = Field( |
67 | | - description="Size of the request loader.", |
68 | | - ) |
69 | | - max_number_per_strategy: Optional[int] = Field( |
70 | | - description="Maximum number of requests to process per strategy.", |
71 | | - ge=0, |
72 | | - ) |
73 | | - max_duration_per_strategy: Optional[float] = Field( |
74 | | - description="Maximum duration (in seconds) to process requests per strategy.", |
75 | | - ge=0, |
76 | | - ) |
77 | | - warmup_percent_per_strategy: Optional[float] = Field( |
78 | | - description="Percentage of requests to use for warmup.", |
79 | | - ge=0, |
80 | | - le=1, |
81 | | - ) |
82 | | - cooldown_percent_per_strategy: Optional[float] = Field( |
83 | | - description="Percentage of requests to use for cooldown.", |
84 | | - ge=0, |
85 | | - le=1, |
86 | | - ) |
87 | | - |
88 | | - @property |
89 | | - def max_number(self) -> Optional[int]: |
90 | | - if self.max_number_per_strategy is not None: |
91 | | - return self.max_number_per_strategy |
92 | | - |
93 | | - if self.requests_loader_size is not None: |
94 | | - return self.requests_loader_size |
95 | | - |
96 | | - return None |
97 | | - |
98 | | - @property |
99 | | - def max_duration(self) -> Optional[float]: |
100 | | - return self.max_duration_per_strategy |
101 | | - |
102 | | - @property |
103 | | - def warmup_number(self) -> Optional[int]: |
104 | | - if self.warmup_percent_per_strategy is None or self.max_number is None: |
105 | | - return None |
106 | | - |
107 | | - return int(self.warmup_percent_per_strategy * self.max_number) |
108 | | - |
109 | | - @property |
110 | | - def warmup_duration(self) -> Optional[float]: |
111 | | - if self.warmup_percent_per_strategy is None or self.max_duration is None: |
112 | | - return None |
113 | | - |
114 | | - return self.warmup_percent_per_strategy * self.max_duration |
115 | | - |
116 | | - @property |
117 | | - def cooldown_number(self) -> Optional[int]: |
118 | | - if self.cooldown_percent_per_strategy is None or self.max_number is None: |
119 | | - return None |
120 | | - |
121 | | - return int(self.cooldown_percent_per_strategy * self.max_number) |
| 42 | +""" |
| 43 | +Scheduler: |
| 44 | + requests: Iterable[ |
| 45 | + Union[RequestT, Iterable[Union[RequestT, tuple[RequestT, float]]]] |
| 46 | + ], |
| 47 | + backend: BackendT[RequestT, ResponseT], |
| 48 | + strategy: SchedulingStrategy, |
| 49 | + env: Environment, |
| 50 | + **constraints: dict[ |
| 51 | + str, Union[int, float, str, ConstraintsResolveArgs, CallableConstraint] |
| 52 | + ], |
122 | 53 |
|
123 | | - @property |
124 | | - def cooldown_duration(self) -> Optional[float]: |
125 | | - if self.cooldown_percent_per_strategy is None or self.max_duration is None: |
126 | | - return None |
| 54 | +CallableConstraint = Callable[ |
| 55 | + [SchedulerState, ScheduledRequestInfo], SchedulerUpdateAction |
| 56 | +] |
| 57 | +""" |
127 | 58 |
|
128 | | - return self.cooldown_percent_per_strategy * self.max_duration |
129 | 59 |
|
| 60 | +CallableConstraintInitializer = Callable[ |
| 61 | + [AggregatorT, BenchmarkT], |
| 62 | + Callable[[SchedulerState, ScheduledRequestInfo], SchedulerUpdateAction], |
| 63 | +] |
130 | 64 |
|
131 | | -class Benchmarker(Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC): |
132 | | - def __init__( |
133 | | - self, |
134 | | - worker: RequestsWorker[RequestT, ResponseT], |
135 | | - request_loader: Iterable[RequestT], |
136 | | - requests_loader_description: RequestLoaderDescription, |
137 | | - benchmark_save_extras: Optional[dict[str, Any]] = None, |
138 | | - ): |
139 | | - self.worker = worker |
140 | | - self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler( |
141 | | - worker=worker, request_loader=request_loader |
142 | | - ) |
143 | | - self.requests_loader_description = requests_loader_description |
144 | | - self.benchmark_save_extras = benchmark_save_extras |
145 | 65 |
|
| 66 | +class Benchmarker( |
| 67 | + Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC, ThreadSafeSingletonMixin |
| 68 | +): |
146 | 69 | async def run( |
147 | 70 | self, |
| 71 | + requests: Iterable[ |
| 72 | + Union[RequestT, Iterable[Union[RequestT, tuple[RequestT, float]]]] |
| 73 | + ], |
| 74 | + backend: BackendT[RequestT, ResponseT], |
148 | 75 | profile: Profile, |
149 | | - max_number_per_strategy: Optional[int], |
150 | | - max_duration_per_strategy: Optional[float], |
151 | | - warmup_percent_per_strategy: Optional[float], |
152 | | - cooldown_percent_per_strategy: Optional[float], |
| 76 | + environment: Environment, |
| 77 | + aggregator: type[AggregatorT], |
153 | 78 | ) -> AsyncGenerator[ |
154 | 79 | BenchmarkerResult[AggregatorT, BenchmarkT, RequestT, ResponseT], None |
155 | 80 | ]: |
|
0 commit comments