From ded15056e79e6ea1386ff715d1dd546d1b56ee11 Mon Sep 17 00:00:00 2001 From: dcaswell Date: Mon, 1 Dec 2025 17:11:14 -0700 Subject: [PATCH] feat: Add an interface for the ability to process speech-to-text asynchronously This adds the ability to be able to send a request to a provider to create a transcript where the provider will give you an id and then send a webhook to you in the future when the job is done with that id. This is just supplying the interface that a provider can utilize in the future. --- src/Audio/PendingRequest.php | 42 ++++++++++++++++- src/Audio/ProviderIdResponse.php | 17 +++++++ src/Audio/SpeechToTextAsyncRequest.php | 62 ++++++++++++++++++++++++++ src/Providers/Provider.php | 12 +++++ tests/Audio/PendingRequestTest.php | 52 +++++++++++++++++++++ tests/TestDoubles/TestProvider.php | 30 +++++++++++-- 6 files changed, 210 insertions(+), 5 deletions(-) create mode 100644 src/Audio/ProviderIdResponse.php create mode 100644 src/Audio/SpeechToTextAsyncRequest.php create mode 100644 tests/Audio/PendingRequestTest.php diff --git a/src/Audio/PendingRequest.php b/src/Audio/PendingRequest.php index a7b008223..773c196b0 100644 --- a/src/Audio/PendingRequest.php +++ b/src/Audio/PendingRequest.php @@ -19,11 +19,11 @@ class PendingRequest use ConfiguresProviders; use HasProviderOptions; - protected string|Audio $input; + protected string|Audio|int $input; protected string $voice; - public function withInput(string|Audio $input): self + public function withInput(string|Audio|int $input): self { $this->input = $input; @@ -59,6 +59,28 @@ public function asText(): TextResponse } } + public function asTextProviderId(): ProviderIdResponse + { + $request = $this->toSpeechToTextRequest(); + + try { + return $this->provider->speechToTextProviderId($request); + } catch (RequestException $e) { + $this->provider->handleRequestException($request->model(), $e); + } + } + + public function asTextAsync(): TextResponse + { + $request = $this->toSpeechToTextAsyncRequest(); + + try { + return $this->provider->speechToTextAsync($request); + } catch (RequestException $e) { + $this->provider->handleRequestException($request->model(), $e); + } + } + protected function toTextToSpeechRequest(): TextToSpeechRequest { if (! is_string($this->input)) { @@ -91,4 +113,20 @@ protected function toSpeechToTextRequest(): SpeechToTextRequest providerOptions: $this->providerOptions, ); } + + protected function toSpeechToTextAsyncRequest(): SpeechToTextAsyncRequest + { + if (! is_string($this->input) && ! is_int($this->input)) { + throw new InvalidArgumentException('Async speech-to-text requires the input be the Provider ID as a string or integer'); + } + + return new SpeechToTextAsyncRequest( + model: $this->model, + providerKey: $this->providerKey(), + input: $this->input, + clientOptions: $this->clientOptions, + clientRetry: $this->clientRetry, + providerOptions: $this->providerOptions, + ); + } } diff --git a/src/Audio/ProviderIdResponse.php b/src/Audio/ProviderIdResponse.php new file mode 100644 index 000000000..d9aac7d8a --- /dev/null +++ b/src/Audio/ProviderIdResponse.php @@ -0,0 +1,17 @@ + */ + public array $additionalContent = [] + ) {} +} diff --git a/src/Audio/SpeechToTextAsyncRequest.php b/src/Audio/SpeechToTextAsyncRequest.php new file mode 100644 index 000000000..bd35b2c04 --- /dev/null +++ b/src/Audio/SpeechToTextAsyncRequest.php @@ -0,0 +1,62 @@ + $clientOptions + * @param array{0: array|int, 1?: Closure|int, 2?: ?callable, 3?: bool} $clientRetry + * @param array $providerOptions + */ + public function __construct( + protected string $model, + protected string $providerKey, + protected string|int $input, + protected array $clientOptions, + protected array $clientRetry, + array $providerOptions = [], + ) { + $this->providerOptions = $providerOptions; + } + + /** + * @return array{0: array|int, 1?: Closure|int, 2?: ?callable, 3?: bool} + */ + public function clientRetry(): array + { + return $this->clientRetry; + } + + /** + * @return array + */ + public function clientOptions(): array + { + return $this->clientOptions; + } + + public function input(): string|int + { + return $this->input; + } + + public function model(): string + { + return $this->model; + } + + public function provider(): string + { + return $this->providerKey; + } +} diff --git a/src/Providers/Provider.php b/src/Providers/Provider.php index 955721b5f..eb1885657 100644 --- a/src/Providers/Provider.php +++ b/src/Providers/Provider.php @@ -7,6 +7,8 @@ use Generator; use Illuminate\Http\Client\RequestException; use Prism\Prism\Audio\AudioResponse as TextToSpeechResponse; +use Prism\Prism\Audio\ProviderIdResponse; +use Prism\Prism\Audio\SpeechToTextAsyncRequest; use Prism\Prism\Audio\SpeechToTextRequest; use Prism\Prism\Audio\TextResponse as SpeechToTextResponse; use Prism\Prism\Audio\TextToSpeechRequest; @@ -56,6 +58,16 @@ public function speechToText(SpeechToTextRequest $request): SpeechToTextResponse throw PrismException::unsupportedProviderAction('speechToText', class_basename($this)); } + public function speechToTextProviderId(SpeechToTextRequest $request): ProviderIdResponse + { + throw PrismException::unsupportedProviderAction('speechToTextProviderId', class_basename($this)); + } + + public function speechToTextAsync(SpeechToTextAsyncRequest $request): SpeechToTextResponse + { + throw PrismException::unsupportedProviderAction('speechToTextAsync', class_basename($this)); + } + /** * @return Generator */ diff --git a/tests/Audio/PendingRequestTest.php b/tests/Audio/PendingRequestTest.php new file mode 100644 index 000000000..94295798c --- /dev/null +++ b/tests/Audio/PendingRequestTest.php @@ -0,0 +1,52 @@ +pendingRequest = new PendingRequest; +}); + +test('it generates a provider id response for speech to text', function (): void { + resolve('prism-manager')->extend('test-provider', fn ($config): ProviderContract => new TestProvider); + + $audio = Audio::fromUrl('https://example.com/audio.mp3', 'audio/mpeg'); + + $response = $this->pendingRequest + ->using('test-provider', 'test-model') + ->withInput($audio) + ->asTextProviderId(); + + $provider = $this->pendingRequest->provider(); + + expect($response) + ->toBeInstanceOf(ProviderIdResponse::class) + ->and($response->id)->toBe('provider-id') + ->and($provider->request)->toBeInstanceOf(SpeechToTextRequest::class) + ->and($provider->request->input())->toBe($audio); +}); + +test('it generates a response for async speech to text', function (): void { + resolve('prism-manager')->extend('test-provider', fn ($config): ProviderContract => new TestProvider); + + $providerId = 'provider-id-123'; + + $response = $this->pendingRequest + ->using('test-provider', 'test-model') + ->withInput($providerId) + ->asTextAsync(); + + $provider = $this->pendingRequest->provider(); + + expect($response->text)->toBe('Async transcript') + ->and($provider->request)->toBeInstanceOf(SpeechToTextAsyncRequest::class) + ->and($provider->request->model())->toBe('test-model') + ->and($provider->request->input())->toBe($providerId); +}); diff --git a/tests/TestDoubles/TestProvider.php b/tests/TestDoubles/TestProvider.php index 296b08503..c2380a232 100644 --- a/tests/TestDoubles/TestProvider.php +++ b/tests/TestDoubles/TestProvider.php @@ -5,6 +5,10 @@ namespace Tests\TestDoubles; use Generator; +use Prism\Prism\Audio\ProviderIdResponse; +use Prism\Prism\Audio\SpeechToTextAsyncRequest; +use Prism\Prism\Audio\SpeechToTextRequest; +use Prism\Prism\Audio\TextResponse as AudioTextResponse; use Prism\Prism\Embeddings\Request as EmbeddingRequest; use Prism\Prism\Embeddings\Response as EmbeddingResponse; use Prism\Prism\Enums\FinishReason; @@ -24,7 +28,7 @@ class TestProvider extends Provider { - public StructuredRequest|TextRequest|EmbeddingRequest|ImageRequest $request; + public StructuredRequest|TextRequest|EmbeddingRequest|ImageRequest|SpeechToTextRequest|SpeechToTextAsyncRequest $request; /** @var array */ public array $clientOptions; @@ -32,7 +36,7 @@ class TestProvider extends Provider /** @var array */ public array $clientRetry; - /** @var array */ + /** @var array */ public array $responses = []; public $callCount = 0; @@ -115,7 +119,27 @@ public function stream(TextRequest $request): Generator throw PrismException::unsupportedProviderAction(__METHOD__, class_basename($this)); } - public function withResponse(StructuredResponse|TextResponse $response): Provider + #[\Override] + public function speechToTextProviderId(SpeechToTextRequest $request): ProviderIdResponse + { + $this->callCount++; + + $this->request = $request; + + return $this->responses[$this->callCount - 1] ?? new ProviderIdResponse('provider-id'); + } + + #[\Override] + public function speechToTextAsync(SpeechToTextAsyncRequest $request): AudioTextResponse + { + $this->callCount++; + + $this->request = $request; + + return $this->responses[$this->callCount - 1] ?? new AudioTextResponse('Async transcript'); + } + + public function withResponse(StructuredResponse|TextResponse|EmbeddingResponse|ImageResponse|AudioTextResponse|ProviderIdResponse $response): Provider { $this->responses[] = $response;