Skip to content

Commit 07594e9

Browse files
committed
feature #858 [Platform][OpenAI] Add support for OpenAI text-to-speech (chr-hertel)
This PR was merged into the main branch. Discussion ---------- [Platform][OpenAI] Add support for OpenAI text-to-speech | Q | A | ------------- | --- | Bug fix? | no | New feature? | yes | Docs? | no | Issues | | License | MIT Adding OpenAI TTS support: https://platform.openai.com/docs/guides/text-to-speech Commits ------- 1ab425d Add support for OpenAI text-to-speech
2 parents b91d837 + 1ab425d commit 07594e9

File tree

11 files changed

+391
-0
lines changed

11 files changed

+391
-0
lines changed

examples/openai/audio-output.php

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
13+
use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech\Voice;
14+
15+
require_once dirname(__DIR__).'/bootstrap.php';
16+
17+
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
18+
19+
$result = $platform->invoke('gpt-4o-mini-tts', 'Today is a wonderful day to build something people love!', [
20+
'voice' => Voice::CORAL,
21+
'instructions' => 'Speak in a cheerful and positive tone.',
22+
]);
23+
24+
echo $result->asBinary();

src/platform/src/Bridge/OpenAi/ModelCatalog.php

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,27 @@ public function __construct(array $additionalModels = [])
234234
'class' => Embeddings::class,
235235
'capabilities' => [Capability::INPUT_TEXT],
236236
],
237+
'tts-1' => [
238+
'class' => TextToSpeech::class,
239+
'capabilities' => [
240+
Capability::INPUT_TEXT,
241+
Capability::OUTPUT_AUDIO,
242+
],
243+
],
244+
'tts-1-hd' => [
245+
'class' => TextToSpeech::class,
246+
'capabilities' => [
247+
Capability::INPUT_TEXT,
248+
Capability::OUTPUT_AUDIO,
249+
],
250+
],
251+
'gpt-4o-mini-tts' => [
252+
'class' => TextToSpeech::class,
253+
'capabilities' => [
254+
Capability::INPUT_TEXT,
255+
Capability::OUTPUT_AUDIO,
256+
],
257+
],
237258
'whisper-1' => [
238259
'class' => Whisper::class,
239260
'capabilities' => [

src/platform/src/Bridge/OpenAi/PlatformFactory.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,14 @@ public static function create(
4242
new Gpt\ModelClient($httpClient, $apiKey, $region),
4343
new Embeddings\ModelClient($httpClient, $apiKey, $region),
4444
new DallE\ModelClient($httpClient, $apiKey, $region),
45+
new TextToSpeech\ModelClient($httpClient, $apiKey, $region),
4546
new Whisper\ModelClient($httpClient, $apiKey, $region),
4647
],
4748
[
4849
new Gpt\ResultConverter(),
4950
new Embeddings\ResultConverter(),
5051
new DallE\ResultConverter(),
52+
new TextToSpeech\ResultConverter(),
5153
new Whisper\ResultConverter(),
5254
],
5355
$modelCatalog,
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Platform\Bridge\OpenAi;
13+
14+
use Symfony\AI\Platform\Model;
15+
16+
/**
17+
* @author Christopher Hertel <mail@christopher-hertel.de>
18+
*/
19+
class TextToSpeech extends Model
20+
{
21+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;
13+
14+
/**
15+
* @author Christopher Hertel <mail@christopher-hertel.de>
16+
*/
17+
interface Format
18+
{
19+
public const MP3 = 'mp3';
20+
public const OPUS = 'opus';
21+
public const AAC = 'aac';
22+
public const FLAC = 'flac';
23+
public const WAV = 'wav';
24+
public const PCM = 'pcm';
25+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;
13+
14+
use Symfony\AI\Platform\Bridge\OpenAi\AbstractModelClient;
15+
use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;
16+
use Symfony\AI\Platform\Exception\InvalidArgumentException;
17+
use Symfony\AI\Platform\Model;
18+
use Symfony\AI\Platform\ModelClientInterface;
19+
use Symfony\AI\Platform\Result\RawHttpResult;
20+
use Symfony\Contracts\HttpClient\HttpClientInterface;
21+
22+
/**
23+
* @author Christopher Hertel <mail@christopher-hertel.de>
24+
*/
25+
final class ModelClient extends AbstractModelClient implements ModelClientInterface
26+
{
27+
public function __construct(
28+
private readonly HttpClientInterface $httpClient,
29+
#[\SensitiveParameter] private readonly string $apiKey,
30+
private readonly ?string $region = null,
31+
) {
32+
self::validateApiKey($apiKey);
33+
}
34+
35+
public function supports(Model $model): bool
36+
{
37+
return $model instanceof TextToSpeech;
38+
}
39+
40+
public function request(Model $model, array|string $payload, array $options = []): RawHttpResult
41+
{
42+
if (!isset($options['voice'])) {
43+
throw new InvalidArgumentException('The "voice" option is required for TextToSpeech requests.');
44+
}
45+
46+
if (isset($options['stream_format']) || isset($options['stream'])) {
47+
throw new InvalidArgumentException('Streaming text to speech results is not supported yet.');
48+
}
49+
50+
return new RawHttpResult($this->httpClient->request('POST', \sprintf('%s/v1/audio/speech', self::getBaseUrl($this->region)), [
51+
'auth_bearer' => $this->apiKey,
52+
'headers' => ['Content-Type' => 'application/json'],
53+
'json' => array_merge($options, ['model' => $model->getName(), 'input' => $payload]),
54+
]));
55+
}
56+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;
13+
14+
use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;
15+
use Symfony\AI\Platform\Exception\RuntimeException;
16+
use Symfony\AI\Platform\Model;
17+
use Symfony\AI\Platform\Result\BinaryResult;
18+
use Symfony\AI\Platform\Result\RawHttpResult;
19+
use Symfony\AI\Platform\Result\RawResultInterface;
20+
use Symfony\AI\Platform\Result\ResultInterface;
21+
use Symfony\AI\Platform\ResultConverterInterface as BaseResponseConverter;
22+
23+
/**
24+
* @author Christopher Hertel <mail@christopher-hertel.de>
25+
*/
26+
final class ResultConverter implements BaseResponseConverter
27+
{
28+
public function supports(Model $model): bool
29+
{
30+
return $model instanceof TextToSpeech;
31+
}
32+
33+
public function convert(RawResultInterface|RawHttpResult $result, array $options = []): ResultInterface
34+
{
35+
$response = $result->getObject();
36+
37+
if (200 !== $response->getStatusCode()) {
38+
throw new RuntimeException(\sprintf('The OpenAI Text-to-Speech API returned an error: "%s"', $response->getContent(false)));
39+
}
40+
41+
return new BinaryResult($result->getObject()->getContent());
42+
}
43+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;
13+
14+
/**
15+
* @author Christopher Hertel <mail@christopher-hertel.de>
16+
*/
17+
interface Voice
18+
{
19+
public const ALLOY = 'alloy';
20+
public const ASH = 'ash';
21+
public const BALLAD = 'ballad';
22+
public const CORAL = 'coral';
23+
public const ECHO = 'echo';
24+
public const FABLE = 'fable';
25+
public const NOVA = 'nova';
26+
public const ONYX = 'onyx';
27+
public const SAGE = 'sage';
28+
public const SHIMMER = 'shimmer';
29+
public const VERSE = 'verse';
30+
}

src/platform/tests/Bridge/OpenAi/ModelCatalogTest.php

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
use Symfony\AI\Platform\Bridge\OpenAi\Embeddings;
1616
use Symfony\AI\Platform\Bridge\OpenAi\Gpt;
1717
use Symfony\AI\Platform\Bridge\OpenAi\ModelCatalog;
18+
use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;
1819
use Symfony\AI\Platform\Bridge\OpenAi\Whisper;
1920
use Symfony\AI\Platform\Capability;
2021
use Symfony\AI\Platform\ModelCatalog\ModelCatalogInterface;
@@ -53,6 +54,11 @@ public static function modelsProvider(): iterable
5354
yield 'text-embedding-3-large' => ['text-embedding-3-large', Embeddings::class, [Capability::INPUT_TEXT]];
5455
yield 'text-embedding-3-small' => ['text-embedding-3-small', Embeddings::class, [Capability::INPUT_TEXT]];
5556

57+
// Text-to-speech models
58+
yield 'tts-1' => ['tts-1', TextToSpeech::class, [Capability::INPUT_TEXT, Capability::OUTPUT_AUDIO]];
59+
yield 'tts-1-hd' => ['tts-1-hd', TextToSpeech::class, [Capability::INPUT_TEXT, Capability::OUTPUT_AUDIO]];
60+
yield 'gpt-4o-mini-tts' => ['gpt-4o-mini-tts', TextToSpeech::class, [Capability::INPUT_TEXT, Capability::OUTPUT_AUDIO]];
61+
5662
// Whisper models
5763
yield 'whisper-1' => ['whisper-1', Whisper::class, [Capability::INPUT_AUDIO, Capability::OUTPUT_TEXT]];
5864

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Platform\Tests\Bridge\OpenAi\TextToSpeech;
13+
14+
use PHPUnit\Framework\TestCase;
15+
use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech;
16+
use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech\ModelClient;
17+
use Symfony\AI\Platform\Bridge\OpenAi\TextToSpeech\ResultConverter;
18+
use Symfony\AI\Platform\Exception\InvalidArgumentException;
19+
use Symfony\AI\Platform\Model;
20+
use Symfony\Component\HttpClient\MockHttpClient;
21+
use Symfony\Component\HttpClient\Response\MockResponse;
22+
use Symfony\Contracts\HttpClient\ResponseInterface as HttpResponse;
23+
24+
/**
25+
* @author Christopher Hertel <mail@christopher-hertel.de>
26+
*/
27+
final class ModelClientTest extends TestCase
28+
{
29+
public function testSupportsTextToSpeechModel()
30+
{
31+
$converter = new ResultConverter();
32+
$model = new TextToSpeech('tts-1');
33+
34+
$this->assertTrue($converter->supports($model));
35+
}
36+
37+
public function testDoesntSupportOtherModels()
38+
{
39+
$converter = new ResultConverter();
40+
$model = new Model('test-model');
41+
42+
$this->assertFalse($converter->supports($model));
43+
}
44+
45+
public function testHappyCase()
46+
{
47+
$resultCallback = static function (string $method, string $url, array $options): HttpResponse {
48+
self::assertSame('POST', $method);
49+
self::assertSame('https://api.openai.com/v1/audio/speech', $url);
50+
self::assertSame('Authorization: Bearer sk-api-key', $options['normalized_headers']['authorization'][0]);
51+
$expectedBody = '{"voice":"alloy","instruction":"Speak like a pirate","model":"tts-1","input":"Hello World!"}';
52+
self::assertSame($expectedBody, $options['body']);
53+
54+
return new MockResponse();
55+
};
56+
$httpClient = new MockHttpClient([$resultCallback]);
57+
$modelClient = new ModelClient($httpClient, 'sk-api-key');
58+
$modelClient->request(new TextToSpeech('tts-1'), 'Hello World!', [
59+
'voice' => 'alloy',
60+
'instruction' => 'Speak like a pirate',
61+
]);
62+
}
63+
64+
public function testFailsWithoutVoiceOption()
65+
{
66+
$this->expectException(InvalidArgumentException::class);
67+
$this->expectExceptionMessage('The "voice" option is required for TextToSpeech requests.');
68+
69+
$httpClient = new MockHttpClient();
70+
$modelClient = new ModelClient($httpClient, 'sk-api-key');
71+
$modelClient->request(new TextToSpeech('tts-1'), 'Hello World!', [
72+
'instruction' => 'Speak like a pirate',
73+
]);
74+
}
75+
76+
public function testFailsWithStreamingOptions()
77+
{
78+
$this->expectException(InvalidArgumentException::class);
79+
$this->expectExceptionMessage('Streaming text to speech results is not supported yet.');
80+
81+
$httpClient = new MockHttpClient();
82+
$modelClient = new ModelClient($httpClient, 'sk-api-key');
83+
$modelClient->request(new TextToSpeech('tts-1'), 'Hello World!', [
84+
'voice' => 'alloy',
85+
'stream' => true,
86+
]);
87+
}
88+
}

0 commit comments

Comments
 (0)