Skip to content

Commit c563c8c

Browse files
committed
Add per-provider concurrency limits to proxy
Introduces a 'max_concurrent' setting for each provider in the proxy configuration, allowing control over the maximum number of concurrent requests per provider. The client now attempts to acquire a slot before sending a request and releases it after completion, skipping providers at max capacity. Configuration validation ensures 'max_concurrent' is a positive integer or None.
1 parent 01100da commit c563c8c

File tree

2 files changed

+20
-0
lines changed

2 files changed

+20
-0
lines changed

optillm/plugins/proxy/client.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,13 @@ def create(self, **kwargs):
215215

216216
attempted_providers.add(provider)
217217

218+
# Try to acquire a slot for this provider (with short timeout to try next provider quickly)
219+
slot_timeout = 0.5 # Don't wait too long for a single provider
220+
if not provider.acquire_slot(timeout=slot_timeout):
221+
logger.debug(f"Provider {provider.name} at max capacity, trying next provider")
222+
errors.append((provider.name, "At max concurrent requests"))
223+
continue
224+
218225
try:
219226
# Map model name if needed and filter out OptiLLM-specific parameters
220227
request_kwargs = self._filter_kwargs(kwargs.copy())
@@ -255,6 +262,11 @@ def create(self, **kwargs):
255262
if self.proxy_client.track_errors:
256263
provider.is_healthy = False
257264
provider.last_error = str(e)
265+
266+
finally:
267+
# Always release the provider slot
268+
provider.release_slot()
269+
logger.debug(f"Released slot for provider {provider.name}")
258270

259271
# All providers failed, try fallback client
260272
if self.proxy_client.fallback_client:

optillm/plugins/proxy/config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ def _apply_defaults(config: Dict) -> Dict:
172172
provider.setdefault('weight', 1)
173173
provider.setdefault('fallback_only', False)
174174
provider.setdefault('model_map', {})
175+
# Per-provider concurrency limit (None means no limit)
176+
provider.setdefault('max_concurrent', None)
175177

176178
return config
177179

@@ -200,6 +202,12 @@ def _validate_config(config: Dict) -> Dict:
200202
if provider['weight'] <= 0:
201203
logger.warning(f"Provider {provider['name']} has invalid weight {provider['weight']}, setting to 1")
202204
provider['weight'] = 1
205+
206+
# Validate max_concurrent if specified
207+
if provider.get('max_concurrent') is not None:
208+
if not isinstance(provider['max_concurrent'], int) or provider['max_concurrent'] <= 0:
209+
logger.warning(f"Provider {provider['name']} has invalid max_concurrent {provider['max_concurrent']}, removing limit")
210+
provider['max_concurrent'] = None
203211

204212
# Validate routing strategy
205213
valid_strategies = ['weighted', 'round_robin', 'failover']

0 commit comments

Comments
 (0)