Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
a6c92be
add gcp anthropic cache support
alexagriffith Oct 20, 2025
5f16faf
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Oct 20, 2025
7282087
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Oct 21, 2025
027b1d3
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Oct 21, 2025
5983e0a
anthropic specific struct
alexagriffith Oct 22, 2025
604b7f3
Merge branch 'alexagriffith/gcp-anthropic-cache' of https://github.co…
alexagriffith Oct 22, 2025
c460eba
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Oct 22, 2025
7edeadd
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Oct 24, 2025
e30dc5f
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Oct 24, 2025
122fc7b
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Oct 28, 2025
fb0251d
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Oct 29, 2025
796f296
refactor cache to be on content level
alexagriffith Oct 29, 2025
657fef6
Merge branch 'alexagriffith/gcp-anthropic-cache' of https://github.co…
alexagriffith Oct 29, 2025
b888b43
add cache to assistant text
alexagriffith Oct 29, 2025
4120e1e
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Oct 30, 2025
22493a2
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Oct 31, 2025
a2aca2d
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Nov 3, 2025
b37c878
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Nov 5, 2025
21e47d3
updating token count
alexagriffith Nov 6, 2025
dac7f78
remove print
alexagriffith Nov 7, 2025
79c25cb
fix test
alexagriffith Nov 7, 2025
c9d2962
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Nov 7, 2025
fc3e6d9
fix upstream test
alexagriffith Nov 7, 2025
f06bad9
Merge branch 'alexagriffith/gcp-anthropic-cache' of https://github.co…
alexagriffith Nov 7, 2025
17e4291
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Nov 7, 2025
3333bbd
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Nov 10, 2025
1eae23c
add gcp anthropic cache support
alexagriffith Oct 20, 2025
85ce975
anthropic specific struct
alexagriffith Oct 22, 2025
3edf6e1
refactor cache to be on content level
alexagriffith Oct 29, 2025
ae763dd
add cache to assistant text
alexagriffith Oct 29, 2025
68fbfaa
updating token count
alexagriffith Nov 6, 2025
d14cbe6
remove print
alexagriffith Nov 7, 2025
a79d486
fix test
alexagriffith Nov 7, 2025
34637f9
fix upstream test
alexagriffith Nov 7, 2025
d5d4475
update merge
alexagriffith Nov 14, 2025
5ef068d
Merge branch 'alexagriffith/gcp-anthropic-cache' of https://github.co…
alexagriffith Nov 14, 2025
1719a93
remove exp host
alexagriffith Nov 14, 2025
00f1247
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith Nov 14, 2025
2b6d70d
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
yuzisun Nov 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 20 additions & 8 deletions internal/apischema/openai/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ type ChatCompletionContentPartTextParam struct {
// The text content.
Text string `json:"text"`
// The type of the content part.
Type string `json:"type"`
Type string `json:"type"`
*AnthropicContentFields `json:",inline,omitempty"`
}

type ChatCompletionContentPartRefusalParam struct {
Expand All @@ -106,7 +107,8 @@ type ChatCompletionContentPartRefusalParam struct {
type ChatCompletionContentPartInputAudioParam struct {
InputAudio ChatCompletionContentPartInputAudioInputAudioParam `json:"input_audio"`
// The type of the content part. Always `input_audio`.
Type ChatCompletionContentPartInputAudioType `json:"type"`
Type ChatCompletionContentPartInputAudioType `json:"type"`
*AnthropicContentFields `json:",inline,omitempty"`
}

// ChatCompletionContentPartInputAudioInputAudioFormat The format of the encoded audio data. Currently supports "wav" and "mp3".
Expand Down Expand Up @@ -144,7 +146,8 @@ type ChatCompletionContentPartImageImageURLParam struct {
type ChatCompletionContentPartImageParam struct {
ImageURL ChatCompletionContentPartImageImageURLParam `json:"image_url"`
// The type of the content part.
Type ChatCompletionContentPartImageType `json:"type"`
Type ChatCompletionContentPartImageType `json:"type"`
*AnthropicContentFields `json:",inline,omitempty"`
}

type ChatCompletionContentPartFileFileParam struct {
Expand All @@ -163,7 +166,8 @@ type ChatCompletionContentPartFileParam struct {
// The type of the content part. Always `file`.
//
// This field can be elided, and will marshal its zero value as "file".
Type ChatCompletionContentPartFileType `json:"type"`
Type ChatCompletionContentPartFileType `json:"type"`
*AnthropicContentFields `json:",inline,omitempty"`
}

// ChatCompletionContentPartUserUnionParam Learn about
Expand Down Expand Up @@ -348,6 +352,11 @@ func (s StringOrUserRoleContentUnion) MarshalJSON() ([]byte, error) {
return json.Marshal(s.Value)
}

// AnthropicContentFields contains Anthropic model-specific fields that can be added to messages.
type AnthropicContentFields struct {
CacheControl anthropic.CacheControlEphemeralParam `json:"cache_control,omitzero"`
}

// Function message is deprecated and we do not allow it.
type ChatCompletionMessageParamUnion struct {
OfDeveloper *ChatCompletionDeveloperMessageParam `json:",omitzero,inline"`
Expand Down Expand Up @@ -504,8 +513,9 @@ type ChatCompletionAssistantMessageParamContent struct {
Text *string `json:"text,omitempty"`

// The signature for a thinking block.
Signature *string `json:"signature,omitempty"`
RedactedContent []byte `json:"redactedContent,omitempty"`
Signature *string `json:"signature,omitempty"`
RedactedContent []byte `json:"redactedContent,omitempty"`
*AnthropicContentFields `json:",inline,omitempty"`
}

// ChatCompletionAssistantMessageParam Messages sent by the model in response to user messages.
Expand Down Expand Up @@ -551,7 +561,8 @@ type ChatCompletionMessageToolCallParam struct {
// The function that the model called.
Function ChatCompletionMessageToolCallFunctionParam `json:"function"`
// The type of the tool. Currently, only `function` is supported.
Type ChatCompletionMessageToolCallType `json:"type,omitempty"`
Type ChatCompletionMessageToolCallType `json:"type,omitempty"`
*AnthropicContentFields `json:",inline,omitempty"`
}

// extractMessageRole extracts role from OpenAI message union types.
Expand Down Expand Up @@ -1070,7 +1081,8 @@ type FunctionDefinition struct {
// or you can pass in a struct which serializes to the proper JSON schema.
// The jsonschema package is provided for convenience, but you should
// consider another specialized library if you require more complex schemas.
Parameters any `json:"parameters"`
Parameters any `json:"parameters"`
*AnthropicContentFields `json:",inline,omitempty"`
}

// Deprecated: use FunctionDefinition instead.
Expand Down
179 changes: 125 additions & 54 deletions internal/translator/openai_gcpanthropic.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,10 @@ func translateOpenAItoAnthropicTools(openAITools []openai.Tool, openAIToolChoice
Description: anthropic.String(openAITool.Function.Description),
}

if isCacheEnabled(openAITool.Function.AnthropicContentFields) {
toolParam.CacheControl = anthropic.NewCacheControlEphemeralParam()
}

// The parameters for the function are expected to be a JSON Schema object.
// We can pass them through as-is.
if openAITool.Function.Parameters != nil {
Expand Down Expand Up @@ -223,7 +227,12 @@ func translateOpenAItoAnthropicTools(openAITools []openai.Tool, openAIToolChoice

// convertImageContentToAnthropic translates an OpenAI image URL into the corresponding Anthropic content block.
// It handles data URIs for various image types and PDFs, as well as remote URLs.
func convertImageContentToAnthropic(imageURL string) (anthropic.ContentBlockParamUnion, error) {
func convertImageContentToAnthropic(imageURL string, fields *openai.AnthropicContentFields) (anthropic.ContentBlockParamUnion, error) {
var cacheControlParam anthropic.CacheControlEphemeralParam
if isCacheEnabled(fields) {
cacheControlParam = fields.CacheControl
}

switch {
case strings.HasPrefix(imageURL, "data:"):
contentType, data, err := parseDataURI(imageURL)
Expand All @@ -233,30 +242,46 @@ func convertImageContentToAnthropic(imageURL string) (anthropic.ContentBlockPara
base64Data := base64.StdEncoding.EncodeToString(data)
if contentType == string(constant.ValueOf[constant.ApplicationPDF]()) {
pdfSource := anthropic.Base64PDFSourceParam{Data: base64Data}
return anthropic.NewDocumentBlock(pdfSource), nil
docBlock := anthropic.NewDocumentBlock(pdfSource)
docBlock.OfDocument.CacheControl = cacheControlParam
return docBlock, nil
}
if isAnthropicSupportedImageMediaType(contentType) {
return anthropic.NewImageBlockBase64(contentType, base64Data), nil
imgBlock := anthropic.NewImageBlockBase64(contentType, base64Data)
imgBlock.OfImage.CacheControl = cacheControlParam
return imgBlock, nil
}
return anthropic.ContentBlockParamUnion{}, fmt.Errorf("invalid media_type for image '%s'", contentType)
case strings.HasSuffix(strings.ToLower(imageURL), ".pdf"):
return anthropic.NewDocumentBlock(anthropic.URLPDFSourceParam{URL: imageURL}), nil
docBlock := anthropic.NewDocumentBlock(anthropic.URLPDFSourceParam{URL: imageURL})
docBlock.OfDocument.CacheControl = cacheControlParam
return docBlock, nil
default:
return anthropic.NewImageBlock(anthropic.URLImageSourceParam{URL: imageURL}), nil
imgBlock := anthropic.NewImageBlock(anthropic.URLImageSourceParam{URL: imageURL})
imgBlock.OfImage.CacheControl = cacheControlParam
return imgBlock, nil
}
}

func isCacheEnabled(fields *openai.AnthropicContentFields) bool {
return fields != nil && fields.CacheControl.Type == constant.ValueOf[constant.Ephemeral]()
}

// convertContentPartsToAnthropic iterates over a slice of OpenAI content parts
// and converts each into an Anthropic content block.
func convertContentPartsToAnthropic(parts []openai.ChatCompletionContentPartUserUnionParam) ([]anthropic.ContentBlockParamUnion, error) {
resultContent := make([]anthropic.ContentBlockParamUnion, 0, len(parts))
for _, contentPart := range parts {
switch {
case contentPart.OfText != nil:
resultContent = append(resultContent, anthropic.NewTextBlock(contentPart.OfText.Text))
textBlock := anthropic.NewTextBlock(contentPart.OfText.Text)
if isCacheEnabled(contentPart.OfText.AnthropicContentFields) {
textBlock.OfText.CacheControl = contentPart.OfText.CacheControl
}
resultContent = append(resultContent, textBlock)

case contentPart.OfImageURL != nil:
block, err := convertImageContentToAnthropic(contentPart.OfImageURL.ImageURL.URL)
block, err := convertImageContentToAnthropic(contentPart.OfImageURL.ImageURL.URL, contentPart.OfImageURL.AnthropicContentFields)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -295,36 +320,45 @@ func openAIToAnthropicContent(content any) ([]anthropic.ContentBlockParamUnion,
anthropic.NewTextBlock(val),
}, nil
case []openai.ChatCompletionContentPartTextParam:
// Convert text params to string and create text block
var sb strings.Builder
var contentBlocks []anthropic.ContentBlockParamUnion
for _, part := range val {
sb.WriteString(part.Text)
textBlock := anthropic.NewTextBlock(part.Text)
// In an array of text parts, each can have its own cache setting.
if isCacheEnabled(part.AnthropicContentFields) {
textBlock.OfText.CacheControl = part.CacheControl
}
contentBlocks = append(contentBlocks, textBlock)
}
return []anthropic.ContentBlockParamUnion{
anthropic.NewTextBlock(sb.String()),
}, nil
return contentBlocks, nil
default:
return nil, fmt.Errorf("unsupported ContentUnion value type: %T", val)
}
}
return nil, fmt.Errorf("unsupported OpenAI content type: %T", content)
}

func extractSystemPromptFromDeveloperMsg(msg openai.ChatCompletionDeveloperMessageParam) string {
// extractSystemPromptFromDeveloperMsg flattens content and checks for cache flags.
// It returns the combined string and a boolean indicating if any part was cacheable.
func extractSystemPromptFromDeveloperMsg(msg openai.ChatCompletionDeveloperMessageParam) (msgValue string, cacheParam *anthropic.CacheControlEphemeralParam) {
switch v := msg.Content.Value.(type) {
case nil:
return ""
return
case string:
return v
msgValue = v
return
case []openai.ChatCompletionContentPartTextParam:
// Concatenate all text parts for completeness.
// Concatenate all text parts and check for caching.
var sb strings.Builder
for _, part := range v {
sb.WriteString(part.Text)
if isCacheEnabled(part.AnthropicContentFields) {
cacheParam = &part.CacheControl
}
}
return sb.String()
msgValue = sb.String()
return
default:
return ""
return
}
}

Expand Down Expand Up @@ -353,7 +387,11 @@ func openAIMessageToAnthropicMessageRoleAssistant(openAiMessage *openai.ChatComp
}
case openai.ChatCompletionAssistantMessageParamContentTypeText:
if content.Text != nil {
contentBlocks = append(contentBlocks, anthropic.NewTextBlock(*content.Text))
textBlock := anthropic.NewTextBlock(*content.Text)
if isCacheEnabled(content.AnthropicContentFields) {
textBlock.OfText.CacheControl = content.CacheControl
}
contentBlocks = append(contentBlocks, textBlock)
}
default:
err = fmt.Errorf("content type not supported: %v", content.Type)
Expand All @@ -375,6 +413,11 @@ func openAIMessageToAnthropicMessageRoleAssistant(openAiMessage *openai.ChatComp
Name: toolCall.Function.Name,
Input: input,
}

if isCacheEnabled(toolCall.AnthropicContentFields) {
toolUse.CacheControl = toolCall.CacheControl
}

contentBlocks = append(contentBlocks, anthropic.ContentBlockParamUnion{OfToolUse: &toolUse})
}

Expand All @@ -391,10 +434,20 @@ func openAIToAnthropicMessages(openAIMsgs []openai.ChatCompletionMessageParamUni
switch {
case msg.OfSystem != nil:
devParam := systemMsgToDeveloperMsg(*msg.OfSystem)
systemBlocks = append(systemBlocks, anthropic.TextBlockParam{Text: extractSystemPromptFromDeveloperMsg(devParam)})
systemText, cacheControl := extractSystemPromptFromDeveloperMsg(devParam)
systemBlock := anthropic.TextBlockParam{Text: systemText}
if cacheControl != nil {
systemBlock.CacheControl = *cacheControl
}
systemBlocks = append(systemBlocks, systemBlock)
i++
case msg.OfDeveloper != nil:
systemBlocks = append(systemBlocks, anthropic.TextBlockParam{Text: extractSystemPromptFromDeveloperMsg(*msg.OfDeveloper)})
systemText, cacheControl := extractSystemPromptFromDeveloperMsg(*msg.OfDeveloper)
systemBlock := anthropic.TextBlockParam{Text: systemText}
if cacheControl != nil {
systemBlock.CacheControl = *cacheControl
}
systemBlocks = append(systemBlocks, systemBlock)
i++
case msg.OfUser != nil:
message := *msg.OfUser
Expand Down Expand Up @@ -425,18 +478,29 @@ func openAIToAnthropicMessages(openAIMsgs []openai.ChatCompletionMessageParamUni
for i < len(openAIMsgs) && openAIMsgs[i].ExtractMessgaeRole() == openai.ChatMessageRoleTool {
currentMsg := &openAIMsgs[i]
toolMsg := currentMsg.OfTool

var contentBlocks []anthropic.ContentBlockParamUnion
contentBlocks, err = openAIToAnthropicContent(toolMsg.Content)
if err != nil {
return
}

var toolContent []anthropic.ToolResultBlockParamContentUnion
var cacheControl *anthropic.CacheControlEphemeralParam

for _, c := range contentBlocks {
var trb anthropic.ToolResultBlockParamContentUnion
if c.OfText != nil {
// Check if the translated part has caching enabled.
switch {
case c.OfText != nil:
trb.OfText = c.OfText
} else if c.OfImage != nil {
cacheControl = &c.OfText.CacheControl
case c.OfImage != nil:
trb.OfImage = c.OfImage
cacheControl = &c.OfImage.CacheControl
case c.OfDocument != nil:
trb.OfDocument = c.OfDocument
cacheControl = &c.OfDocument.CacheControl
}
toolContent = append(toolContent, trb)
}
Expand All @@ -457,7 +521,13 @@ func openAIToAnthropicMessages(openAIMsgs []openai.ChatCompletionMessageParamUni
Content: toolContent,
IsError: anthropic.Bool(isError),
}
toolResultBlocks = append(toolResultBlocks, anthropic.ContentBlockParamUnion{OfToolResult: &toolResultBlock})

if cacheControl != nil {
toolResultBlock.CacheControl = *cacheControl
}

toolResultBlockUnion := anthropic.ContentBlockParamUnion{OfToolResult: &toolResultBlock}
toolResultBlocks = append(toolResultBlocks, toolResultBlockUnion)
i++
}
// Append all aggregated tool results.
Expand Down Expand Up @@ -533,6 +603,28 @@ func buildAnthropicParams(openAIReq *openai.ChatCompletionRequest) (params *anth
return params, nil
}

// anthropicToolUseToOpenAICalls converts Anthropic tool_use content blocks to OpenAI tool calls.
func anthropicToolUseToOpenAICalls(block *anthropic.ContentBlockUnion) ([]openai.ChatCompletionMessageToolCallParam, error) {
var toolCalls []openai.ChatCompletionMessageToolCallParam
if block.Type != string(constant.ValueOf[constant.ToolUse]()) {
return toolCalls, nil
}
argsBytes, err := json.Marshal(block.Input)
if err != nil {
return nil, fmt.Errorf("failed to marshal tool_use input: %w", err)
}
toolCalls = append(toolCalls, openai.ChatCompletionMessageToolCallParam{
ID: &block.ID,
Type: openai.ChatCompletionMessageToolCallTypeFunction,
Function: openai.ChatCompletionMessageToolCallFunctionParam{
Name: block.Name,
Arguments: string(argsBytes),
},
})

return toolCalls, nil
}

// RequestBody implements [OpenAIChatCompletionTranslator.RequestBody] for GCP.
func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) RequestBody(_ []byte, openAIReq *openai.ChatCompletionRequest, _ bool) (
newHeaders []internalapi.Header, newBody []byte, err error,
Expand Down Expand Up @@ -633,28 +725,6 @@ func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) ResponseError(respHeade
return
}

// anthropicToolUseToOpenAICalls converts Anthropic tool_use content blocks to OpenAI tool calls.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just moved it

func anthropicToolUseToOpenAICalls(block *anthropic.ContentBlockUnion) ([]openai.ChatCompletionMessageToolCallParam, error) {
var toolCalls []openai.ChatCompletionMessageToolCallParam
if block.Type != string(constant.ValueOf[constant.ToolUse]()) {
return toolCalls, nil
}
argsBytes, err := json.Marshal(block.Input)
if err != nil {
return nil, fmt.Errorf("failed to marshal tool_use input: %w", err)
}
toolCalls = append(toolCalls, openai.ChatCompletionMessageToolCallParam{
ID: &block.ID,
Type: openai.ChatCompletionMessageToolCallTypeFunction,
Function: openai.ChatCompletionMessageToolCallFunctionParam{
Name: block.Name,
Arguments: string(argsBytes),
},
})

return toolCalls, nil
}

// ResponseHeaders implements [OpenAIChatCompletionTranslator.ResponseHeaders].
func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) ResponseHeaders(_ map[string]string) (
newHeaders []internalapi.Header, err error,
Expand Down Expand Up @@ -692,18 +762,19 @@ func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) ResponseBody(_ map[stri
Object: string(openAIconstant.ValueOf[openAIconstant.ChatCompletion]()),
Choices: make([]openai.ChatCompletionResponseChoice, 0),
}
promptTokens := anthropicResp.Usage.InputTokens + anthropicResp.Usage.CacheReadInputTokens + anthropicResp.Usage.CacheCreationInputTokens
tokenUsage = LLMTokenUsage{
InputTokens: uint32(anthropicResp.Usage.InputTokens), //nolint:gosec
OutputTokens: uint32(anthropicResp.Usage.OutputTokens), //nolint:gosec
TotalTokens: uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec
CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens), //nolint:gosec
InputTokens: uint32(promptTokens), //nolint:gosec
OutputTokens: uint32(anthropicResp.Usage.OutputTokens), //nolint:gosec
TotalTokens: uint32(promptTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec
CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens + anthropicResp.Usage.CacheCreationInputTokens), //nolint:gosec
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think on the openai token usage it is only for the cache read input tokens.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

discussed offline - should be there

}
openAIResp.Usage = openai.Usage{
CompletionTokens: int(anthropicResp.Usage.OutputTokens),
PromptTokens: int(anthropicResp.Usage.InputTokens),
TotalTokens: int(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens),
PromptTokens: int(promptTokens),
TotalTokens: int(promptTokens + anthropicResp.Usage.OutputTokens),
PromptTokensDetails: &openai.PromptTokensDetails{
CachedTokens: int(anthropicResp.Usage.CacheReadInputTokens),
CachedTokens: int(anthropicResp.Usage.CacheReadInputTokens + anthropicResp.Usage.CacheCreationInputTokens),
},
}

Expand Down
Loading