Files
go-openai/audio.go
GargantuaX be253c2d63 change azure engine config to modelMapper (#306)
* change azure engine config to azure modelMapper config

* Update go.mod

* Revert "Update go.mod"

This reverts commit 78d14c58f2a9ce668da43f6adbe20b60afcfe0d7.

* lint fix

* add test

* lint fix

* lint fix

* lint fix

* opt

* opt

* opt

* opt
2023-05-11 01:30:24 +04:00

147 lines
3.9 KiB
Go

package openai
import (
"bytes"
"context"
"fmt"
"net/http"
"os"
)
// Whisper Defines the models provided by OpenAI to use when processing audio with OpenAI.
const (
Whisper1 = "whisper-1"
)
// Response formats; Whisper uses AudioResponseFormatJSON by default.
type AudioResponseFormat string
const (
AudioResponseFormatJSON AudioResponseFormat = "json"
AudioResponseFormatSRT AudioResponseFormat = "srt"
AudioResponseFormatVTT AudioResponseFormat = "vtt"
)
// AudioRequest represents a request structure for audio API.
// ResponseFormat is not supported for now. We only return JSON text, which may be sufficient.
type AudioRequest struct {
Model string
FilePath string
Prompt string // For translation, it should be in English
Temperature float32
Language string // For translation, just do not use it. It seems "en" works, not confirmed...
Format AudioResponseFormat
}
// AudioResponse represents a response structure for audio API.
type AudioResponse struct {
Text string `json:"text"`
}
// CreateTranscription — API call to create a transcription. Returns transcribed text.
func (c *Client) CreateTranscription(
ctx context.Context,
request AudioRequest,
) (response AudioResponse, err error) {
return c.callAudioAPI(ctx, request, "transcriptions")
}
// CreateTranslation — API call to translate audio into English.
func (c *Client) CreateTranslation(
ctx context.Context,
request AudioRequest,
) (response AudioResponse, err error) {
return c.callAudioAPI(ctx, request, "translations")
}
// callAudioAPI — API call to an audio endpoint.
func (c *Client) callAudioAPI(
ctx context.Context,
request AudioRequest,
endpointSuffix string,
) (response AudioResponse, err error) {
var formBody bytes.Buffer
builder := c.createFormBuilder(&formBody)
if err = audioMultipartForm(request, builder); err != nil {
return AudioResponse{}, err
}
urlSuffix := fmt.Sprintf("/audio/%s", endpointSuffix)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.fullURL(urlSuffix, request.Model), &formBody)
if err != nil {
return AudioResponse{}, err
}
req.Header.Add("Content-Type", builder.formDataContentType())
if request.HasJSONResponse() {
err = c.sendRequest(req, &response)
} else {
err = c.sendRequest(req, &response.Text)
}
if err != nil {
return AudioResponse{}, err
}
return
}
// HasJSONResponse returns true if the response format is JSON.
func (r AudioRequest) HasJSONResponse() bool {
return r.Format == "" || r.Format == AudioResponseFormatJSON
}
// audioMultipartForm creates a form with audio file contents and the name of the model to use for
// audio processing.
func audioMultipartForm(request AudioRequest, b formBuilder) error {
f, err := os.Open(request.FilePath)
if err != nil {
return fmt.Errorf("opening audio file: %w", err)
}
defer f.Close()
err = b.createFormFile("file", f)
if err != nil {
return fmt.Errorf("creating form file: %w", err)
}
err = b.writeField("model", request.Model)
if err != nil {
return fmt.Errorf("writing model name: %w", err)
}
// Create a form field for the prompt (if provided)
if request.Prompt != "" {
err = b.writeField("prompt", request.Prompt)
if err != nil {
return fmt.Errorf("writing prompt: %w", err)
}
}
// Create a form field for the format (if provided)
if request.Format != "" {
err = b.writeField("response_format", string(request.Format))
if err != nil {
return fmt.Errorf("writing format: %w", err)
}
}
// Create a form field for the temperature (if provided)
if request.Temperature != 0 {
err = b.writeField("temperature", fmt.Sprintf("%.2f", request.Temperature))
if err != nil {
return fmt.Errorf("writing temperature: %w", err)
}
}
// Create a form field for the language (if provided)
if request.Language != "" {
err = b.writeField("language", request.Language)
if err != nil {
return fmt.Errorf("writing language: %w", err)
}
}
// Close the multipart writer
return b.close()
}