Skip to content

Commit 76b19bc

Browse files
committed
Linting/Formatting of core folder
1 parent 69f24cd commit 76b19bc

File tree

7 files changed

+532
-314
lines changed

7 files changed

+532
-314
lines changed

app/core/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""Core processing utilities for MLX OpenAI server."""
2+
13
from .audio_processor import AudioProcessor
24
from .base_processor import BaseProcessor
35
from .image_processor import ImageProcessor

app/core/audio_processor.py

Lines changed: 148 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,98 +1,207 @@
1-
import os
2-
import gc
1+
"""Audio processing utilities for MLX OpenAI server."""
2+
3+
from __future__ import annotations
4+
35
import asyncio
4-
from typing import List
6+
import gc
7+
from pathlib import Path
8+
from typing import Any
9+
from urllib.parse import urlparse
10+
511
from .base_processor import BaseProcessor
612

713

814
class AudioProcessor(BaseProcessor):
915
"""Audio processor for handling audio files with caching and validation."""
10-
11-
def __init__(self, max_workers: int = 4, cache_size: int = 1000):
16+
17+
def __init__(self, max_workers: int = 4, cache_size: int = 1000) -> None:
18+
"""
19+
Initialize the AudioProcessor.
20+
21+
Parameters
22+
----------
23+
max_workers : int, optional
24+
Maximum number of worker threads for processing, by default 4.
25+
cache_size : int, optional
26+
Maximum number of cached files to keep, by default 1000.
27+
"""
1228
super().__init__(max_workers, cache_size)
1329
# Supported audio formats
14-
self._supported_formats = {'.mp3', '.wav'}
30+
self._supported_formats = {".mp3", ".wav", ".m4a", ".ogg", ".flac", ".aac"}
1531

16-
def _get_media_format(self, media_url: str, data: bytes = None) -> str:
17-
"""Determine audio format from URL or data."""
32+
def _get_media_format(self, media_url: str, _data: bytes | None = None) -> str:
33+
"""
34+
Determine audio format from URL or data.
35+
36+
Parameters
37+
----------
38+
media_url : str
39+
The URL or data URL of the audio file.
40+
_data : bytes or None, optional
41+
Audio data bytes, not used in this implementation.
42+
43+
Returns
44+
-------
45+
str
46+
The audio format (e.g., 'mp3', 'wav').
47+
"""
1848
if media_url.startswith("data:"):
1949
# Extract format from data URL
2050
mime_type = media_url.split(";")[0].split(":")[1]
2151
if "mp3" in mime_type or "mpeg" in mime_type:
2252
return "mp3"
23-
elif "wav" in mime_type:
53+
if "wav" in mime_type:
2454
return "wav"
25-
elif "m4a" in mime_type or "mp4" in mime_type:
55+
if "m4a" in mime_type or "mp4" in mime_type:
2656
return "m4a"
27-
elif "ogg" in mime_type:
57+
if "ogg" in mime_type:
2858
return "ogg"
29-
elif "flac" in mime_type:
59+
if "flac" in mime_type:
3060
return "flac"
31-
elif "aac" in mime_type:
61+
if "aac" in mime_type:
3262
return "aac"
3363
else:
3464
# Extract format from file extension
35-
ext = os.path.splitext(media_url.lower())[1]
65+
parsed = urlparse(media_url)
66+
if parsed.scheme:
67+
# It's a URL, get the path part
68+
path = parsed.path
69+
else:
70+
path = media_url
71+
ext = Path(path.lower()).suffix
3672
if ext in self._supported_formats:
3773
return ext[1:] # Remove the dot
38-
74+
3975
# Default to mp3 if format cannot be determined
4076
return "mp3"
4177

4278
def _validate_media_data(self, data: bytes) -> bool:
43-
"""Basic validation of audio data."""
79+
"""
80+
Validate basic audio data.
81+
82+
Parameters
83+
----------
84+
data : bytes
85+
The audio data to validate.
86+
87+
Returns
88+
-------
89+
bool
90+
True if the data appears to be valid audio, False otherwise.
91+
"""
4492
if len(data) < 100: # Too small to be a valid audio file
4593
return False
46-
94+
4795
# Check for common audio file signatures
4896
audio_signatures = [
49-
b'ID3', # MP3 with ID3 tag
50-
b'\xff\xfb', # MP3 frame header
51-
b'\xff\xf3', # MP3 frame header
52-
b'\xff\xf2', # MP3 frame header
53-
b'RIFF', # WAV/AVI
54-
b'OggS', # OGG
55-
b'fLaC', # FLAC
56-
b'\x00\x00\x00\x20ftypM4A', # M4A
97+
b"ID3", # MP3 with ID3 tag
98+
b"\xff\xfb", # MP3 frame header
99+
b"\xff\xf3", # MP3 frame header
100+
b"\xff\xf2", # MP3 frame header
101+
b"RIFF", # WAV/AVI
102+
b"OggS", # OGG
103+
b"fLaC", # FLAC
104+
b"\x00\x00\x00\x20ftypM4A", # M4A
57105
]
58-
106+
59107
for sig in audio_signatures:
60108
if data.startswith(sig):
61109
return True
62-
110+
63111
# Check for WAV format (RIFF header might be at different position)
64-
if b'WAVE' in data[:50]:
112+
if b"WAVE" in data[:50]:
65113
return True
66-
114+
67115
return True # Allow unknown formats to pass through
68116

69117
def _get_timeout(self) -> int:
70-
"""Get timeout for HTTP requests."""
118+
"""
119+
Get timeout for HTTP requests.
120+
121+
Returns
122+
-------
123+
int
124+
Timeout in seconds for audio file downloads.
125+
"""
71126
return 60 # Longer timeout for audio files
72127

73128
def _get_max_file_size(self) -> int:
74-
"""Get maximum file size in bytes."""
129+
"""
130+
Get maximum file size in bytes.
131+
132+
Returns
133+
-------
134+
int
135+
Maximum allowed file size for audio files in bytes.
136+
"""
75137
return 500 * 1024 * 1024 # 500 MB limit for audio
76138

77-
def _process_media_data(self, data: bytes, cached_path: str, **kwargs) -> str:
78-
"""Process audio data and save to cached path."""
79-
with open(cached_path, 'wb') as f:
139+
def _process_media_data(self, data: bytes, cached_path: str, **_kwargs: Any) -> str:
140+
"""
141+
Process audio data and save to cached path.
142+
143+
Parameters
144+
----------
145+
data : bytes
146+
The audio data to process.
147+
cached_path : str
148+
Path where the processed audio should be saved.
149+
**_kwargs : Any
150+
Additional keyword arguments (unused).
151+
152+
Returns
153+
-------
154+
str
155+
The path to the cached audio file.
156+
"""
157+
with Path(cached_path).open("wb") as f:
80158
f.write(data)
81159
self._cleanup_old_files()
82160
return cached_path
83161

84162
def _get_media_type_name(self) -> str:
85-
"""Get media type name for logging."""
163+
"""
164+
Get media type name for logging.
165+
166+
Returns
167+
-------
168+
str
169+
The string 'audio' for logging purposes.
170+
"""
86171
return "audio"
87172

88173
async def process_audio_url(self, audio_url: str) -> str:
89-
"""Process a single audio URL and return path to cached file."""
174+
"""
175+
Process a single audio URL and return path to cached file.
176+
177+
Parameters
178+
----------
179+
audio_url : str
180+
The URL of the audio file to process.
181+
182+
Returns
183+
-------
184+
str
185+
Path to the cached audio file.
186+
"""
90187
return await self._process_single_media(audio_url)
91188

92-
async def process_audio_urls(self, audio_urls: List[str]) -> List[str]:
93-
"""Process multiple audio URLs and return paths to cached files."""
189+
async def process_audio_urls(self, audio_urls: list[str]) -> list[str | BaseException]:
190+
"""
191+
Process multiple audio URLs and return a list containing either file path strings or BaseException instances for failed items.
192+
193+
Parameters
194+
----------
195+
audio_urls : list[str]
196+
List of audio URLs to process.
197+
198+
Returns
199+
-------
200+
list[str | BaseException]
201+
List where each element is either a path to a cached audio file (str) or a BaseException for failed processing.
202+
"""
94203
tasks = [self.process_audio_url(url) for url in audio_urls]
95204
results = await asyncio.gather(*tasks, return_exceptions=True)
96205
# Force garbage collection after batch processing
97206
gc.collect()
98-
return results
207+
return results

0 commit comments

Comments
 (0)