diff --git a/Makefile b/Makefile index 7a129674..b2686912 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,5 @@ +.DEFAULT_GOAL := build + all: build-ui build build-ui: diff --git a/pkg/builtin/defaults.go b/pkg/builtin/defaults.go index 04410075..6b31ca90 100644 --- a/pkg/builtin/defaults.go +++ b/pkg/builtin/defaults.go @@ -6,17 +6,12 @@ import ( ) var ( - DefaultModel = openai.DefaultModel - DefaultVisionModel = openai.DefaultVisionModel + DefaultModel = openai.DefaultModel ) func SetDefaults(tool types.Tool) types.Tool { if tool.Parameters.ModelName == "" { - if tool.Parameters.Vision { - tool.Parameters.ModelName = DefaultVisionModel - } else { - tool.Parameters.ModelName = DefaultModel - } + tool.Parameters.ModelName = DefaultModel } return tool } diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 512a9c47..3a5a37d9 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -173,7 +173,6 @@ func (e *Engine) Start(ctx Context, input string) (*Return, error) { completion := types.CompletionRequest{ Model: tool.Parameters.ModelName, - Vision: tool.Parameters.Vision, MaxToken: tool.Parameters.MaxTokens, JSONResponse: tool.Parameters.JSONResponse, Cache: tool.Parameters.Cache, diff --git a/pkg/openai/client.go b/pkg/openai/client.go index a8b08076..00d528e7 100644 --- a/pkg/openai/client.go +++ b/pkg/openai/client.go @@ -16,12 +16,10 @@ import ( "github.com/gptscript-ai/gptscript/pkg/cache" "github.com/gptscript-ai/gptscript/pkg/hash" "github.com/gptscript-ai/gptscript/pkg/types" - "github.com/gptscript-ai/gptscript/pkg/vision" "github.com/sashabaranov/go-openai" ) const ( - DefaultVisionModel = openai.GPT4VisionPreview DefaultModel = openai.GPT4TurboPreview DefaultPromptParameter = "defaultPromptParameter" ) @@ -171,15 +169,8 @@ func toToolCall(call types.CompletionToolCall) openai.ToolCall { } } -func toMessages(cache *cache.Client, request types.CompletionRequest) (result []openai.ChatCompletionMessage, err error) { +func toMessages(request types.CompletionRequest) (result []openai.ChatCompletionMessage, err error) { for _, message := range request.Messages { - if request.Vision { - message, err = vision.ToVisionMessage(cache, message) - if err != nil { - return nil, err - } - } - chatMessage := openai.ChatCompletionMessage{ Role: string(message.Role), } @@ -192,25 +183,6 @@ func toMessages(cache *cache.Client, request types.CompletionRequest) (result [] if content.ToolCall != nil { chatMessage.ToolCalls = append(chatMessage.ToolCalls, toToolCall(*content.ToolCall)) } - if content.Image != nil { - url, err := vision.ImageToURL(cache, request.Vision, *content.Image) - if err != nil { - return nil, err - } - if request.Vision { - chatMessage.MultiContent = append(chatMessage.MultiContent, openai.ChatMessagePart{ - Type: openai.ChatMessagePartTypeImageURL, - ImageURL: &openai.ChatMessageImageURL{ - URL: url, - }, - }) - } else { - chatMessage.MultiContent = append(chatMessage.MultiContent, openai.ChatMessagePart{ - Type: openai.ChatMessagePartTypeText, - Text: fmt.Sprintf("Image URL %s", url), - }) - } - } if content.Text != "" { chatMessage.MultiContent = append(chatMessage.MultiContent, openai.ChatMessagePart{ Type: openai.ChatMessagePartTypeText, @@ -251,7 +223,7 @@ type Status struct { } func (c *Client) Call(ctx context.Context, messageRequest types.CompletionRequest, status chan<- Status) (*types.CompletionMessage, error) { - msgs, err := toMessages(c.cache, messageRequest) + msgs, err := toMessages(messageRequest) if err != nil { return nil, err } @@ -277,21 +249,19 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques } } - if !messageRequest.Vision { - for _, tool := range messageRequest.Tools { - params := tool.Function.Parameters - if params != nil && params.Type == "object" && params.Properties == nil { - params.Properties = map[string]types.Property{} - } - request.Tools = append(request.Tools, openai.Tool{ - Type: openai.ToolType(tool.Type), - Function: openai.FunctionDefinition{ - Name: tool.Function.Name, - Description: tool.Function.Description, - Parameters: params, - }, - }) + for _, tool := range messageRequest.Tools { + params := tool.Function.Parameters + if params != nil && params.Type == "object" && params.Properties == nil { + params.Properties = map[string]types.Property{} } + request.Tools = append(request.Tools, openai.Tool{ + Type: openai.ToolType(tool.Type), + Function: openai.FunctionDefinition{ + Name: tool.Function.Name, + Description: tool.Function.Description, + Parameters: params, + }, + }) } id := fmt.Sprint(atomic.AddInt64(&completionID, 1)) @@ -368,7 +338,7 @@ func appendMessage(msg types.CompletionMessage, response openai.ChatCompletionSt if delta.Content != "" { found := false for i, content := range msg.Content { - if content.ToolCall != nil || content.Image != nil { + if content.ToolCall != nil { continue } msg.Content[i] = types.ContentPart{ diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go index 15c84bb5..a8274204 100644 --- a/pkg/parser/parser.go +++ b/pkg/parser/parser.go @@ -98,11 +98,6 @@ func isParam(line string, tool *types.Tool) (_ bool, err error) { if err := addArg(value, tool); err != nil { return false, err } - case "vision": - tool.Parameters.Vision, err = toBool(value) - if err != nil { - return false, err - } case "maxtoken": fallthrough case "maxtokens": diff --git a/pkg/types/completion.go b/pkg/types/completion.go index 26375ca4..90f6db08 100644 --- a/pkg/types/completion.go +++ b/pkg/types/completion.go @@ -13,7 +13,6 @@ type CompletionToolType string type CompletionRequest struct { Model string - Vision bool Tools []CompletionTool Messages []CompletionMessage MaxToken int @@ -77,17 +76,6 @@ func (in CompletionMessage) String() string { if content.ToolCall != nil { buf.WriteString(fmt.Sprintf("tool call %s -> %s", content.ToolCall.Function.Name, content.ToolCall.Function.Arguments)) } - if content.Image != nil { - buf.WriteString("image: ") - if content.Image.URL != "" { - buf.WriteString(content.Image.URL) - } - if len(content.Image.Base64) > 50 { - buf.WriteString(content.Image.Base64[:50] + "...") - } else { - buf.WriteString(content.Image.Base64) - } - } } return buf.String() } @@ -95,22 +83,6 @@ func (in CompletionMessage) String() string { type ContentPart struct { Text string `json:"text,omitempty"` ToolCall *CompletionToolCall `json:"toolCall,omitempty"` - Image *ImageURL `json:"image,omitempty"` -} - -type ImageURLDetail string - -const ( - ImageURLDetailHigh ImageURLDetail = "high" - ImageURLDetailLow ImageURLDetail = "low" - ImageURLDetailAuto ImageURLDetail = "auto" -) - -type ImageURL struct { - Base64 string `json:"base64,omitempty"` - ContentType string `json:"contentType,omitempty"` - URL string `json:"url,omitempty"` - Detail ImageURLDetail `json:"detail,omitempty"` } type CompletionToolCall struct { diff --git a/pkg/types/tool.go b/pkg/types/tool.go index c71cff46..c6d76713 100644 --- a/pkg/types/tool.go +++ b/pkg/types/tool.go @@ -26,7 +26,6 @@ type BuiltinFunc func(ctx context.Context, env []string, input string) (string, type Parameters struct { Name string `json:"name,omitempty"` Description string `json:"description,omitempty"` - Vision bool `json:"vision,omitempty"` MaxTokens int `json:"maxTokens,omitempty"` ModelName string `json:"modelName,omitempty"` JSONResponse bool `json:"jsonResponse,omitempty"` @@ -59,9 +58,6 @@ func (t Tool) String() string { if len(t.Parameters.Tools) != 0 { _, _ = fmt.Fprintf(buf, "Tools: %s\n", strings.Join(t.Parameters.Tools, ", ")) } - if t.Parameters.Vision { - _, _ = fmt.Fprintln(buf, "Vision: true") - } if t.Parameters.MaxTokens != 0 { _, _ = fmt.Fprintf(buf, "Max Tokens: %d\n", t.Parameters.MaxTokens) } diff --git a/pkg/vision/image.go b/pkg/vision/image.go deleted file mode 100644 index 055178d4..00000000 --- a/pkg/vision/image.go +++ /dev/null @@ -1,99 +0,0 @@ -package vision - -import ( - "encoding/base64" - "encoding/json" - "fmt" - "os" - "strings" - - "github.com/gptscript-ai/gptscript/pkg/cache" - "github.com/gptscript-ai/gptscript/pkg/hash" - "github.com/gptscript-ai/gptscript/pkg/types" -) - -var ( - urlBase = os.Getenv("cached://") -) - -func ToVisionMessage(c *cache.Client, message types.CompletionMessage) (types.CompletionMessage, error) { - if len(message.Content) != 1 || !strings.HasPrefix(message.Content[0].Text, "{") { - return message, nil - } - - var ( - input inputMessage - content = message.Content[0] - ) - if err := json.Unmarshal([]byte(content.Text), &input); err != nil { - return message, nil - } - - content.Text = input.Text - - if input.URL != "" { - b64, ok, err := Base64FromStored(c, input.URL) - if err != nil { - return message, err - } - if b64 == "" || !ok { - content.Image = &types.ImageURL{ - URL: input.URL, - } - } else { - input.Base64 = b64 - } - } - - if input.Base64 != "" && input.ContentType != "" { - content.Image = &types.ImageURL{ - Base64: input.Base64, - ContentType: input.ContentType, - } - } - - message.Content = []types.ContentPart{ - content, - } - - return message, nil -} - -func Base64FromStored(cache *cache.Client, url string) (string, bool, error) { - if !strings.HasPrefix(url, urlBase) { - return "", false, nil - } - parts := strings.Split(url, "/") - if len(parts) < 2 { - return "", false, nil - } - name := parts[len(parts)-1] - - cached, ok, err := cache.Get(name) - if err != nil || !ok { - return "", ok, err - } - - return base64.StdEncoding.EncodeToString(cached), true, nil -} - -func ImageToURL(c *cache.Client, vision bool, message types.ImageURL) (string, error) { - if message.URL != "" { - return message.URL, nil - } - - if vision { - return fmt.Sprintf("data:%s;base64,%s", message.ContentType, message.Base64), nil - } - - data, err := base64.StdEncoding.DecodeString(message.Base64) - if err != nil { - return "", err - } - - id := "i" + hash.Encode(message)[:12] - if err := c.Store(id, data); err != nil { - return "", err - } - return fmt.Sprintf("%s/%s", urlBase, id), nil -} diff --git a/pkg/vision/schema.go b/pkg/vision/schema.go deleted file mode 100644 index 1fb41d9e..00000000 --- a/pkg/vision/schema.go +++ /dev/null @@ -1,39 +0,0 @@ -package vision - -import ( - "github.com/gptscript-ai/gptscript/pkg/types" -) - -var ( - Schema = types.JSONSchema{ - Property: types.Property{ - Type: "object", - }, - Properties: map[string]types.Property{ - "base64": { - Description: "The base64 encoded value of the image if an image URL is not specified", - Type: "string", - }, - "contentType": { - Description: `The content type of the image such as "image/jpeg" or "image/png"`, - Type: "string", - }, - "text": { - Description: "Instructions on how the passed image should be analyzed", - Type: "string", - }, - "url": { - Description: "The URL to the image to be processed. This should be set if base64 is not set", - Type: "string", - }, - }, - Defs: map[string]types.JSONSchema{}, - } -) - -type inputMessage struct { - Text string `json:"text,omitempty"` - Base64 string `json:"base64,omitempty"` - ContentType string `json:"contentType,omitempty"` - URL string `json:"url,omitempty"` -}