-
Notifications
You must be signed in to change notification settings - Fork 127
feat: gcp anthropic cache support #1396
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
yuzisun
merged 39 commits into
envoyproxy:main
from
alexagriffith:alexagriffith/gcp-anthropic-cache
Nov 16, 2025
Merged
Changes from all commits
Commits
Show all changes
39 commits
Select commit
Hold shift + click to select a range
a6c92be
add gcp anthropic cache support
alexagriffith 5f16faf
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith 7282087
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith 027b1d3
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith 5983e0a
anthropic specific struct
alexagriffith 604b7f3
Merge branch 'alexagriffith/gcp-anthropic-cache' of https://github.co…
alexagriffith c460eba
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith 7edeadd
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith e30dc5f
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith 122fc7b
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith fb0251d
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith 796f296
refactor cache to be on content level
alexagriffith 657fef6
Merge branch 'alexagriffith/gcp-anthropic-cache' of https://github.co…
alexagriffith b888b43
add cache to assistant text
alexagriffith 4120e1e
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith 22493a2
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith a2aca2d
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith b37c878
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith 21e47d3
updating token count
alexagriffith dac7f78
remove print
alexagriffith 79c25cb
fix test
alexagriffith c9d2962
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith fc3e6d9
fix upstream test
alexagriffith f06bad9
Merge branch 'alexagriffith/gcp-anthropic-cache' of https://github.co…
alexagriffith 17e4291
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith 3333bbd
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith 1eae23c
add gcp anthropic cache support
alexagriffith 85ce975
anthropic specific struct
alexagriffith 3edf6e1
refactor cache to be on content level
alexagriffith ae763dd
add cache to assistant text
alexagriffith 68fbfaa
updating token count
alexagriffith d14cbe6
remove print
alexagriffith a79d486
fix test
alexagriffith 34637f9
fix upstream test
alexagriffith d5d4475
update merge
alexagriffith 5ef068d
Merge branch 'alexagriffith/gcp-anthropic-cache' of https://github.co…
alexagriffith 1719a93
remove exp host
alexagriffith 00f1247
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
alexagriffith 2b6d70d
Merge branch 'main' into alexagriffith/gcp-anthropic-cache
yuzisun File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -151,6 +151,10 @@ func translateOpenAItoAnthropicTools(openAITools []openai.Tool, openAIToolChoice | |
| Description: anthropic.String(openAITool.Function.Description), | ||
| } | ||
|
|
||
| if isCacheEnabled(openAITool.Function.AnthropicContentFields) { | ||
| toolParam.CacheControl = anthropic.NewCacheControlEphemeralParam() | ||
| } | ||
|
|
||
| // The parameters for the function are expected to be a JSON Schema object. | ||
| // We can pass them through as-is. | ||
| if openAITool.Function.Parameters != nil { | ||
|
|
@@ -223,7 +227,12 @@ func translateOpenAItoAnthropicTools(openAITools []openai.Tool, openAIToolChoice | |
|
|
||
| // convertImageContentToAnthropic translates an OpenAI image URL into the corresponding Anthropic content block. | ||
| // It handles data URIs for various image types and PDFs, as well as remote URLs. | ||
| func convertImageContentToAnthropic(imageURL string) (anthropic.ContentBlockParamUnion, error) { | ||
| func convertImageContentToAnthropic(imageURL string, fields *openai.AnthropicContentFields) (anthropic.ContentBlockParamUnion, error) { | ||
| var cacheControlParam anthropic.CacheControlEphemeralParam | ||
| if isCacheEnabled(fields) { | ||
| cacheControlParam = fields.CacheControl | ||
| } | ||
|
|
||
| switch { | ||
| case strings.HasPrefix(imageURL, "data:"): | ||
| contentType, data, err := parseDataURI(imageURL) | ||
|
|
@@ -233,30 +242,46 @@ func convertImageContentToAnthropic(imageURL string) (anthropic.ContentBlockPara | |
| base64Data := base64.StdEncoding.EncodeToString(data) | ||
| if contentType == string(constant.ValueOf[constant.ApplicationPDF]()) { | ||
| pdfSource := anthropic.Base64PDFSourceParam{Data: base64Data} | ||
| return anthropic.NewDocumentBlock(pdfSource), nil | ||
| docBlock := anthropic.NewDocumentBlock(pdfSource) | ||
| docBlock.OfDocument.CacheControl = cacheControlParam | ||
| return docBlock, nil | ||
| } | ||
| if isAnthropicSupportedImageMediaType(contentType) { | ||
| return anthropic.NewImageBlockBase64(contentType, base64Data), nil | ||
| imgBlock := anthropic.NewImageBlockBase64(contentType, base64Data) | ||
| imgBlock.OfImage.CacheControl = cacheControlParam | ||
| return imgBlock, nil | ||
| } | ||
| return anthropic.ContentBlockParamUnion{}, fmt.Errorf("invalid media_type for image '%s'", contentType) | ||
| case strings.HasSuffix(strings.ToLower(imageURL), ".pdf"): | ||
| return anthropic.NewDocumentBlock(anthropic.URLPDFSourceParam{URL: imageURL}), nil | ||
| docBlock := anthropic.NewDocumentBlock(anthropic.URLPDFSourceParam{URL: imageURL}) | ||
| docBlock.OfDocument.CacheControl = cacheControlParam | ||
| return docBlock, nil | ||
| default: | ||
| return anthropic.NewImageBlock(anthropic.URLImageSourceParam{URL: imageURL}), nil | ||
| imgBlock := anthropic.NewImageBlock(anthropic.URLImageSourceParam{URL: imageURL}) | ||
| imgBlock.OfImage.CacheControl = cacheControlParam | ||
| return imgBlock, nil | ||
| } | ||
| } | ||
|
|
||
| func isCacheEnabled(fields *openai.AnthropicContentFields) bool { | ||
| return fields != nil && fields.CacheControl.Type == constant.ValueOf[constant.Ephemeral]() | ||
| } | ||
|
|
||
| // convertContentPartsToAnthropic iterates over a slice of OpenAI content parts | ||
| // and converts each into an Anthropic content block. | ||
| func convertContentPartsToAnthropic(parts []openai.ChatCompletionContentPartUserUnionParam) ([]anthropic.ContentBlockParamUnion, error) { | ||
| resultContent := make([]anthropic.ContentBlockParamUnion, 0, len(parts)) | ||
| for _, contentPart := range parts { | ||
| switch { | ||
| case contentPart.OfText != nil: | ||
| resultContent = append(resultContent, anthropic.NewTextBlock(contentPart.OfText.Text)) | ||
| textBlock := anthropic.NewTextBlock(contentPart.OfText.Text) | ||
| if isCacheEnabled(contentPart.OfText.AnthropicContentFields) { | ||
| textBlock.OfText.CacheControl = contentPart.OfText.CacheControl | ||
| } | ||
| resultContent = append(resultContent, textBlock) | ||
|
|
||
| case contentPart.OfImageURL != nil: | ||
| block, err := convertImageContentToAnthropic(contentPart.OfImageURL.ImageURL.URL) | ||
| block, err := convertImageContentToAnthropic(contentPart.OfImageURL.ImageURL.URL, contentPart.OfImageURL.AnthropicContentFields) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
@@ -295,36 +320,45 @@ func openAIToAnthropicContent(content any) ([]anthropic.ContentBlockParamUnion, | |
| anthropic.NewTextBlock(val), | ||
| }, nil | ||
| case []openai.ChatCompletionContentPartTextParam: | ||
| // Convert text params to string and create text block | ||
| var sb strings.Builder | ||
| var contentBlocks []anthropic.ContentBlockParamUnion | ||
| for _, part := range val { | ||
| sb.WriteString(part.Text) | ||
| textBlock := anthropic.NewTextBlock(part.Text) | ||
| // In an array of text parts, each can have its own cache setting. | ||
| if isCacheEnabled(part.AnthropicContentFields) { | ||
| textBlock.OfText.CacheControl = part.CacheControl | ||
| } | ||
| contentBlocks = append(contentBlocks, textBlock) | ||
| } | ||
| return []anthropic.ContentBlockParamUnion{ | ||
| anthropic.NewTextBlock(sb.String()), | ||
| }, nil | ||
| return contentBlocks, nil | ||
| default: | ||
| return nil, fmt.Errorf("unsupported ContentUnion value type: %T", val) | ||
| } | ||
| } | ||
| return nil, fmt.Errorf("unsupported OpenAI content type: %T", content) | ||
| } | ||
|
|
||
| func extractSystemPromptFromDeveloperMsg(msg openai.ChatCompletionDeveloperMessageParam) string { | ||
| // extractSystemPromptFromDeveloperMsg flattens content and checks for cache flags. | ||
| // It returns the combined string and a boolean indicating if any part was cacheable. | ||
| func extractSystemPromptFromDeveloperMsg(msg openai.ChatCompletionDeveloperMessageParam) (msgValue string, cacheParam *anthropic.CacheControlEphemeralParam) { | ||
| switch v := msg.Content.Value.(type) { | ||
| case nil: | ||
| return "" | ||
| return | ||
| case string: | ||
| return v | ||
| msgValue = v | ||
| return | ||
| case []openai.ChatCompletionContentPartTextParam: | ||
| // Concatenate all text parts for completeness. | ||
| // Concatenate all text parts and check for caching. | ||
| var sb strings.Builder | ||
| for _, part := range v { | ||
| sb.WriteString(part.Text) | ||
| if isCacheEnabled(part.AnthropicContentFields) { | ||
| cacheParam = &part.CacheControl | ||
| } | ||
| } | ||
| return sb.String() | ||
| msgValue = sb.String() | ||
| return | ||
| default: | ||
| return "" | ||
| return | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -353,7 +387,11 @@ func openAIMessageToAnthropicMessageRoleAssistant(openAiMessage *openai.ChatComp | |
| } | ||
| case openai.ChatCompletionAssistantMessageParamContentTypeText: | ||
| if content.Text != nil { | ||
| contentBlocks = append(contentBlocks, anthropic.NewTextBlock(*content.Text)) | ||
| textBlock := anthropic.NewTextBlock(*content.Text) | ||
| if isCacheEnabled(content.AnthropicContentFields) { | ||
| textBlock.OfText.CacheControl = content.CacheControl | ||
| } | ||
| contentBlocks = append(contentBlocks, textBlock) | ||
| } | ||
| default: | ||
| err = fmt.Errorf("content type not supported: %v", content.Type) | ||
|
|
@@ -375,6 +413,11 @@ func openAIMessageToAnthropicMessageRoleAssistant(openAiMessage *openai.ChatComp | |
| Name: toolCall.Function.Name, | ||
| Input: input, | ||
| } | ||
|
|
||
| if isCacheEnabled(toolCall.AnthropicContentFields) { | ||
| toolUse.CacheControl = toolCall.CacheControl | ||
| } | ||
|
|
||
| contentBlocks = append(contentBlocks, anthropic.ContentBlockParamUnion{OfToolUse: &toolUse}) | ||
| } | ||
|
|
||
|
|
@@ -391,10 +434,20 @@ func openAIToAnthropicMessages(openAIMsgs []openai.ChatCompletionMessageParamUni | |
| switch { | ||
| case msg.OfSystem != nil: | ||
| devParam := systemMsgToDeveloperMsg(*msg.OfSystem) | ||
| systemBlocks = append(systemBlocks, anthropic.TextBlockParam{Text: extractSystemPromptFromDeveloperMsg(devParam)}) | ||
| systemText, cacheControl := extractSystemPromptFromDeveloperMsg(devParam) | ||
| systemBlock := anthropic.TextBlockParam{Text: systemText} | ||
| if cacheControl != nil { | ||
| systemBlock.CacheControl = *cacheControl | ||
| } | ||
| systemBlocks = append(systemBlocks, systemBlock) | ||
| i++ | ||
| case msg.OfDeveloper != nil: | ||
| systemBlocks = append(systemBlocks, anthropic.TextBlockParam{Text: extractSystemPromptFromDeveloperMsg(*msg.OfDeveloper)}) | ||
| systemText, cacheControl := extractSystemPromptFromDeveloperMsg(*msg.OfDeveloper) | ||
| systemBlock := anthropic.TextBlockParam{Text: systemText} | ||
| if cacheControl != nil { | ||
| systemBlock.CacheControl = *cacheControl | ||
| } | ||
| systemBlocks = append(systemBlocks, systemBlock) | ||
| i++ | ||
| case msg.OfUser != nil: | ||
| message := *msg.OfUser | ||
|
|
@@ -425,18 +478,29 @@ func openAIToAnthropicMessages(openAIMsgs []openai.ChatCompletionMessageParamUni | |
| for i < len(openAIMsgs) && openAIMsgs[i].ExtractMessgaeRole() == openai.ChatMessageRoleTool { | ||
| currentMsg := &openAIMsgs[i] | ||
| toolMsg := currentMsg.OfTool | ||
|
|
||
| var contentBlocks []anthropic.ContentBlockParamUnion | ||
| contentBlocks, err = openAIToAnthropicContent(toolMsg.Content) | ||
| if err != nil { | ||
| return | ||
| } | ||
|
|
||
| var toolContent []anthropic.ToolResultBlockParamContentUnion | ||
| var cacheControl *anthropic.CacheControlEphemeralParam | ||
|
|
||
| for _, c := range contentBlocks { | ||
| var trb anthropic.ToolResultBlockParamContentUnion | ||
| if c.OfText != nil { | ||
| // Check if the translated part has caching enabled. | ||
| switch { | ||
| case c.OfText != nil: | ||
| trb.OfText = c.OfText | ||
| } else if c.OfImage != nil { | ||
| cacheControl = &c.OfText.CacheControl | ||
| case c.OfImage != nil: | ||
| trb.OfImage = c.OfImage | ||
| cacheControl = &c.OfImage.CacheControl | ||
| case c.OfDocument != nil: | ||
| trb.OfDocument = c.OfDocument | ||
| cacheControl = &c.OfDocument.CacheControl | ||
| } | ||
| toolContent = append(toolContent, trb) | ||
| } | ||
|
|
@@ -457,7 +521,13 @@ func openAIToAnthropicMessages(openAIMsgs []openai.ChatCompletionMessageParamUni | |
| Content: toolContent, | ||
| IsError: anthropic.Bool(isError), | ||
| } | ||
| toolResultBlocks = append(toolResultBlocks, anthropic.ContentBlockParamUnion{OfToolResult: &toolResultBlock}) | ||
|
|
||
| if cacheControl != nil { | ||
| toolResultBlock.CacheControl = *cacheControl | ||
| } | ||
|
|
||
| toolResultBlockUnion := anthropic.ContentBlockParamUnion{OfToolResult: &toolResultBlock} | ||
| toolResultBlocks = append(toolResultBlocks, toolResultBlockUnion) | ||
| i++ | ||
| } | ||
| // Append all aggregated tool results. | ||
|
|
@@ -533,6 +603,28 @@ func buildAnthropicParams(openAIReq *openai.ChatCompletionRequest) (params *anth | |
| return params, nil | ||
| } | ||
|
|
||
| // anthropicToolUseToOpenAICalls converts Anthropic tool_use content blocks to OpenAI tool calls. | ||
| func anthropicToolUseToOpenAICalls(block *anthropic.ContentBlockUnion) ([]openai.ChatCompletionMessageToolCallParam, error) { | ||
| var toolCalls []openai.ChatCompletionMessageToolCallParam | ||
| if block.Type != string(constant.ValueOf[constant.ToolUse]()) { | ||
| return toolCalls, nil | ||
| } | ||
| argsBytes, err := json.Marshal(block.Input) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to marshal tool_use input: %w", err) | ||
| } | ||
| toolCalls = append(toolCalls, openai.ChatCompletionMessageToolCallParam{ | ||
| ID: &block.ID, | ||
| Type: openai.ChatCompletionMessageToolCallTypeFunction, | ||
| Function: openai.ChatCompletionMessageToolCallFunctionParam{ | ||
| Name: block.Name, | ||
| Arguments: string(argsBytes), | ||
| }, | ||
| }) | ||
|
|
||
| return toolCalls, nil | ||
| } | ||
|
|
||
| // RequestBody implements [OpenAIChatCompletionTranslator.RequestBody] for GCP. | ||
| func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) RequestBody(_ []byte, openAIReq *openai.ChatCompletionRequest, _ bool) ( | ||
| newHeaders []internalapi.Header, newBody []byte, err error, | ||
|
|
@@ -633,28 +725,6 @@ func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) ResponseError(respHeade | |
| return | ||
| } | ||
|
|
||
| // anthropicToolUseToOpenAICalls converts Anthropic tool_use content blocks to OpenAI tool calls. | ||
| func anthropicToolUseToOpenAICalls(block *anthropic.ContentBlockUnion) ([]openai.ChatCompletionMessageToolCallParam, error) { | ||
| var toolCalls []openai.ChatCompletionMessageToolCallParam | ||
| if block.Type != string(constant.ValueOf[constant.ToolUse]()) { | ||
| return toolCalls, nil | ||
| } | ||
| argsBytes, err := json.Marshal(block.Input) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to marshal tool_use input: %w", err) | ||
| } | ||
| toolCalls = append(toolCalls, openai.ChatCompletionMessageToolCallParam{ | ||
| ID: &block.ID, | ||
| Type: openai.ChatCompletionMessageToolCallTypeFunction, | ||
| Function: openai.ChatCompletionMessageToolCallFunctionParam{ | ||
| Name: block.Name, | ||
| Arguments: string(argsBytes), | ||
| }, | ||
| }) | ||
|
|
||
| return toolCalls, nil | ||
| } | ||
|
|
||
| // ResponseHeaders implements [OpenAIChatCompletionTranslator.ResponseHeaders]. | ||
| func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) ResponseHeaders(_ map[string]string) ( | ||
| newHeaders []internalapi.Header, err error, | ||
|
|
@@ -692,18 +762,19 @@ func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) ResponseBody(_ map[stri | |
| Object: string(openAIconstant.ValueOf[openAIconstant.ChatCompletion]()), | ||
| Choices: make([]openai.ChatCompletionResponseChoice, 0), | ||
| } | ||
| promptTokens := anthropicResp.Usage.InputTokens + anthropicResp.Usage.CacheReadInputTokens + anthropicResp.Usage.CacheCreationInputTokens | ||
| tokenUsage = LLMTokenUsage{ | ||
| InputTokens: uint32(anthropicResp.Usage.InputTokens), //nolint:gosec | ||
| OutputTokens: uint32(anthropicResp.Usage.OutputTokens), //nolint:gosec | ||
| TotalTokens: uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec | ||
| CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens), //nolint:gosec | ||
| InputTokens: uint32(promptTokens), //nolint:gosec | ||
| OutputTokens: uint32(anthropicResp.Usage.OutputTokens), //nolint:gosec | ||
| TotalTokens: uint32(promptTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec | ||
| CachedInputTokens: uint32(anthropicResp.Usage.CacheReadInputTokens + anthropicResp.Usage.CacheCreationInputTokens), //nolint:gosec | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think on the openai token usage it is only for the cache read input tokens.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. discussed offline - should be there |
||
| } | ||
| openAIResp.Usage = openai.Usage{ | ||
| CompletionTokens: int(anthropicResp.Usage.OutputTokens), | ||
| PromptTokens: int(anthropicResp.Usage.InputTokens), | ||
| TotalTokens: int(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), | ||
| PromptTokens: int(promptTokens), | ||
| TotalTokens: int(promptTokens + anthropicResp.Usage.OutputTokens), | ||
| PromptTokensDetails: &openai.PromptTokensDetails{ | ||
| CachedTokens: int(anthropicResp.Usage.CacheReadInputTokens), | ||
| CachedTokens: int(anthropicResp.Usage.CacheReadInputTokens + anthropicResp.Usage.CacheCreationInputTokens), | ||
| }, | ||
| } | ||
|
|
||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just moved it