Add support for multi part chat messages (and gpt-4-vision-preview model) (#580)
* Add support for multi part chat messages OpenAI has recently introduced a new model called gpt-4-visual-preview, which now supports images as input. The chat completion endpoint accepts multi-part chat messages, where the content can be an array of structs in addition to the usual string format. This commit introduces new structures and constants to represent different types of content parts. It also implements the json.Marshaler and json.Unmarshaler interfaces on ChatCompletionMessage. * Add ImageURLDetail and ChatMessagePartType types * Optimize ChatCompletionMessage deserialization * Add ErrContentFieldsMisused error
This commit is contained in:
91
chat.go
91
chat.go
@@ -2,6 +2,7 @@ package openai
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"net/http"
|
"net/http"
|
||||||
)
|
)
|
||||||
@@ -20,6 +21,7 @@ const chatCompletionsSuffix = "/chat/completions"
|
|||||||
var (
|
var (
|
||||||
ErrChatCompletionInvalidModel = errors.New("this model is not supported with this method, please use CreateCompletion client method instead") //nolint:lll
|
ErrChatCompletionInvalidModel = errors.New("this model is not supported with this method, please use CreateCompletion client method instead") //nolint:lll
|
||||||
ErrChatCompletionStreamNotSupported = errors.New("streaming is not supported with this method, please use CreateChatCompletionStream") //nolint:lll
|
ErrChatCompletionStreamNotSupported = errors.New("streaming is not supported with this method, please use CreateChatCompletionStream") //nolint:lll
|
||||||
|
ErrContentFieldsMisused = errors.New("can't use both Content and MultiContent properties simultaneously")
|
||||||
)
|
)
|
||||||
|
|
||||||
type Hate struct {
|
type Hate struct {
|
||||||
@@ -51,9 +53,36 @@ type PromptAnnotation struct {
|
|||||||
ContentFilterResults ContentFilterResults `json:"content_filter_results,omitempty"`
|
ContentFilterResults ContentFilterResults `json:"content_filter_results,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ImageURLDetail string
|
||||||
|
|
||||||
|
const (
|
||||||
|
ImageURLDetailHigh ImageURLDetail = "high"
|
||||||
|
ImageURLDetailLow ImageURLDetail = "low"
|
||||||
|
ImageURLDetailAuto ImageURLDetail = "auto"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ChatMessageImageURL struct {
|
||||||
|
URL string `json:"url,omitempty"`
|
||||||
|
Detail ImageURLDetail `json:"detail,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ChatMessagePartType string
|
||||||
|
|
||||||
|
const (
|
||||||
|
ChatMessagePartTypeText ChatMessagePartType = "text"
|
||||||
|
ChatMessagePartTypeImageURL ChatMessagePartType = "image_url"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ChatMessagePart struct {
|
||||||
|
Type ChatMessagePartType `json:"type,omitempty"`
|
||||||
|
Text string `json:"text,omitempty"`
|
||||||
|
ImageURL *ChatMessageImageURL `json:"image_url,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type ChatCompletionMessage struct {
|
type ChatCompletionMessage struct {
|
||||||
Role string `json:"role"`
|
Role string `json:"role"`
|
||||||
Content string `json:"content"`
|
Content string `json:"content"`
|
||||||
|
MultiContent []ChatMessagePart
|
||||||
|
|
||||||
// This property isn't in the official documentation, but it's in
|
// This property isn't in the official documentation, but it's in
|
||||||
// the documentation for the official library for python:
|
// the documentation for the official library for python:
|
||||||
@@ -70,6 +99,64 @@ type ChatCompletionMessage struct {
|
|||||||
ToolCallID string `json:"tool_call_id,omitempty"`
|
ToolCallID string `json:"tool_call_id,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m ChatCompletionMessage) MarshalJSON() ([]byte, error) {
|
||||||
|
if m.Content != "" && m.MultiContent != nil {
|
||||||
|
return nil, ErrContentFieldsMisused
|
||||||
|
}
|
||||||
|
if len(m.MultiContent) > 0 {
|
||||||
|
msg := struct {
|
||||||
|
Role string `json:"role"`
|
||||||
|
Content string `json:"-"`
|
||||||
|
MultiContent []ChatMessagePart `json:"content,omitempty"`
|
||||||
|
Name string `json:"name,omitempty"`
|
||||||
|
FunctionCall *FunctionCall `json:"function_call,omitempty"`
|
||||||
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||||
|
ToolCallID string `json:"tool_call_id,omitempty"`
|
||||||
|
}(m)
|
||||||
|
return json.Marshal(msg)
|
||||||
|
}
|
||||||
|
msg := struct {
|
||||||
|
Role string `json:"role"`
|
||||||
|
Content string `json:"content"`
|
||||||
|
MultiContent []ChatMessagePart `json:"-"`
|
||||||
|
Name string `json:"name,omitempty"`
|
||||||
|
FunctionCall *FunctionCall `json:"function_call,omitempty"`
|
||||||
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||||
|
ToolCallID string `json:"tool_call_id,omitempty"`
|
||||||
|
}(m)
|
||||||
|
return json.Marshal(msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *ChatCompletionMessage) UnmarshalJSON(bs []byte) error {
|
||||||
|
msg := struct {
|
||||||
|
Role string `json:"role"`
|
||||||
|
Content string `json:"content"`
|
||||||
|
MultiContent []ChatMessagePart
|
||||||
|
Name string `json:"name,omitempty"`
|
||||||
|
FunctionCall *FunctionCall `json:"function_call,omitempty"`
|
||||||
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||||
|
ToolCallID string `json:"tool_call_id,omitempty"`
|
||||||
|
}{}
|
||||||
|
if err := json.Unmarshal(bs, &msg); err == nil {
|
||||||
|
*m = ChatCompletionMessage(msg)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
multiMsg := struct {
|
||||||
|
Role string `json:"role"`
|
||||||
|
Content string
|
||||||
|
MultiContent []ChatMessagePart `json:"content"`
|
||||||
|
Name string `json:"name,omitempty"`
|
||||||
|
FunctionCall *FunctionCall `json:"function_call,omitempty"`
|
||||||
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||||
|
ToolCallID string `json:"tool_call_id,omitempty"`
|
||||||
|
}{}
|
||||||
|
if err := json.Unmarshal(bs, &multiMsg); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
*m = ChatCompletionMessage(multiMsg)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
type ToolCall struct {
|
type ToolCall struct {
|
||||||
// Index is not nil only in chat completion chunk object
|
// Index is not nil only in chat completion chunk object
|
||||||
Index *int `json:"index,omitempty"`
|
Index *int `json:"index,omitempty"`
|
||||||
|
|||||||
103
chat_test.go
103
chat_test.go
@@ -3,6 +3,7 @@ package openai_test
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -296,6 +297,108 @@ func TestAzureChatCompletions(t *testing.T) {
|
|||||||
checks.NoError(t, err, "CreateAzureChatCompletion error")
|
checks.NoError(t, err, "CreateAzureChatCompletion error")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMultipartChatCompletions(t *testing.T) {
|
||||||
|
client, server, teardown := setupAzureTestServer()
|
||||||
|
defer teardown()
|
||||||
|
server.RegisterHandler("/openai/deployments/*", handleChatCompletionEndpoint)
|
||||||
|
|
||||||
|
_, err := client.CreateChatCompletion(context.Background(), openai.ChatCompletionRequest{
|
||||||
|
MaxTokens: 5,
|
||||||
|
Model: openai.GPT3Dot5Turbo,
|
||||||
|
Messages: []openai.ChatCompletionMessage{
|
||||||
|
{
|
||||||
|
Role: openai.ChatMessageRoleUser,
|
||||||
|
MultiContent: []openai.ChatMessagePart{
|
||||||
|
{
|
||||||
|
Type: openai.ChatMessagePartTypeText,
|
||||||
|
Text: "Hello!",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: openai.ChatMessagePartTypeImageURL,
|
||||||
|
ImageURL: &openai.ChatMessageImageURL{
|
||||||
|
URL: "URL",
|
||||||
|
Detail: openai.ImageURLDetailLow,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
checks.NoError(t, err, "CreateAzureChatCompletion error")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMultipartChatMessageSerialization(t *testing.T) {
|
||||||
|
jsonText := `[{"role":"system","content":"system-message"},` +
|
||||||
|
`{"role":"user","content":[{"type":"text","text":"nice-text"},` +
|
||||||
|
`{"type":"image_url","image_url":{"url":"URL","detail":"high"}}]}]`
|
||||||
|
|
||||||
|
var msgs []openai.ChatCompletionMessage
|
||||||
|
err := json.Unmarshal([]byte(jsonText), &msgs)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Expected no error: %s", err)
|
||||||
|
}
|
||||||
|
if len(msgs) != 2 {
|
||||||
|
t.Errorf("unexpected number of messages")
|
||||||
|
}
|
||||||
|
if msgs[0].Role != "system" || msgs[0].Content != "system-message" || msgs[0].MultiContent != nil {
|
||||||
|
t.Errorf("invalid user message: %v", msgs[0])
|
||||||
|
}
|
||||||
|
if msgs[1].Role != "user" || msgs[1].Content != "" || len(msgs[1].MultiContent) != 2 {
|
||||||
|
t.Errorf("invalid user message")
|
||||||
|
}
|
||||||
|
parts := msgs[1].MultiContent
|
||||||
|
if parts[0].Type != "text" || parts[0].Text != "nice-text" {
|
||||||
|
t.Errorf("invalid text part: %v", parts[0])
|
||||||
|
}
|
||||||
|
if parts[1].Type != "image_url" || parts[1].ImageURL.URL != "URL" || parts[1].ImageURL.Detail != "high" {
|
||||||
|
t.Errorf("invalid image_url part")
|
||||||
|
}
|
||||||
|
|
||||||
|
s, err := json.Marshal(msgs)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Expected no error: %s", err)
|
||||||
|
}
|
||||||
|
res := strings.ReplaceAll(string(s), " ", "")
|
||||||
|
if res != jsonText {
|
||||||
|
t.Fatalf("invalid message: %s", string(s))
|
||||||
|
}
|
||||||
|
|
||||||
|
invalidMsg := []openai.ChatCompletionMessage{
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: "some-text",
|
||||||
|
MultiContent: []openai.ChatMessagePart{
|
||||||
|
{
|
||||||
|
Type: "text",
|
||||||
|
Text: "nice-text",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_, err = json.Marshal(invalidMsg)
|
||||||
|
if !errors.Is(err, openai.ErrContentFieldsMisused) {
|
||||||
|
t.Fatalf("Expected error: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = json.Unmarshal([]byte(`["not-a-message"]`), &msgs)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("Expected error")
|
||||||
|
}
|
||||||
|
|
||||||
|
emptyMultiContentMsg := openai.ChatCompletionMessage{
|
||||||
|
Role: "user",
|
||||||
|
MultiContent: []openai.ChatMessagePart{},
|
||||||
|
}
|
||||||
|
s, err = json.Marshal(emptyMultiContentMsg)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Unexpected error")
|
||||||
|
}
|
||||||
|
res = strings.ReplaceAll(string(s), " ", "")
|
||||||
|
if res != `{"role":"user","content":""}` {
|
||||||
|
t.Fatalf("invalid message: %s", string(s))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// handleChatCompletionEndpoint Handles the ChatGPT completion endpoint by the test server.
|
// handleChatCompletionEndpoint Handles the ChatGPT completion endpoint by the test server.
|
||||||
func handleChatCompletionEndpoint(w http.ResponseWriter, r *http.Request) {
|
func handleChatCompletionEndpoint(w http.ResponseWriter, r *http.Request) {
|
||||||
var err error
|
var err error
|
||||||
|
|||||||
Reference in New Issue
Block a user