Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ require (
github.com/envoyproxy/gateway v1.5.0
github.com/envoyproxy/go-control-plane v0.13.5-0.20250622153809-434b6986176d
github.com/envoyproxy/go-control-plane/envoy v1.32.5-0.20250622153809-434b6986176d
github.com/fatih/structs v1.1.0
github.com/go-logr/logr v1.4.3
github.com/google/cel-go v0.26.0
github.com/google/go-cmp v0.7.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,8 @@ github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f h1:Wl78ApPPB2
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f/go.mod h1:OSYXu++VVOHnXeitef/D8n/6y4QV8uLHSFXX4NeXMGc=
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo=
github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
github.com/fatih/structtag v1.2.0 h1:/OdNE99OxoI/PqaW/SuSK9uxxT3f/tcSZgon/ssNSx4=
github.com/fatih/structtag v1.2.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4/aAZl94=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
Expand Down
38 changes: 38 additions & 0 deletions internal/apischema/awsbedrock/awsbedrock.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,11 @@ type ConverseInput struct {
// is not in the model response, it is ignored by Converse.
AdditionalModelResponseFieldPaths []*string `json:"additionalModelResponseFieldPaths,omitempty"`

// Additional inference parameters that the model supports, beyond the base set of inference parameters
// that Converse and ConverseStream support in the inferenceConfig field.
// For more information, see https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html.
AdditionalModelRequestFields map[string]interface{} `json:"additionalModelRequestFieldPaths,omitempty"`

// Configuration information for a guardrail that you want to use in the request.
GuardrailConfig *GuardrailConfiguration `json:"guardrailConfig,omitempty"`

Expand Down Expand Up @@ -292,6 +297,33 @@ type ToolUseBlock struct {
ToolUseID string `json:"toolUseId"`
}

// ReasoningTextBlock contains the reasoning text and an optional signature.
// See: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ReasoningTextBlock.html
type ReasoningTextBlock struct {
// The reasoning that the model used to return the output.
Text string `json:"text"`
// A token that verifies that the reasoning text was generated by the model.
Signature *string `json:"signature,omitempty"`
}

// RedactedContentBlock contains content that has been redacted.
// This is based on the structure of ReasoningTextBlock as per AWS documentation patterns.
type RedactedContentBlock struct {
// The redacted text.
Text string `json:"text"`
// A token that verifies the redaction.
Signature *string `json:"signature,omitempty"`
}

// ReasoningContentBlock contains the reasoning trace for the inference that the model ran.
// See: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ReasoningContentBlock.html
type ReasoningContentBlock struct {
// The reasoning that the model used to return the output.
ReasoningText *ReasoningTextBlock `json:"reasoningText,omitempty"`
// The content that has been redacted from the reasoning trace.
RedactedContent *RedactedContentBlock `json:"redactedContent,omitempty"`
}

// ContentBlock is defined in the AWS Bedrock API:
// https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ContentBlock.html
type ContentBlock struct {
Expand All @@ -310,6 +342,12 @@ type ContentBlock struct {

// Information about a tool use request from a model.
ToolUse *ToolUseBlock `json:"toolUse,omitempty"`

// Contains content regarding the reasoning that is carried out by the model. Reasoning refers to a Chain of Thought (CoT) that the model generates to enhance the accuracy of its final response.
// Note: This object is a Union. Only one member of this object can be specified or returned.
// Required: No
// See https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ReasoningContentBlock.html for more information.
ReasoningContent *ReasoningContentBlock `json:"reasoningContent,omitempty"`
}

// ConverseMetrics Metrics for a call to Converse (https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_Converse.html).
Expand Down
105 changes: 80 additions & 25 deletions internal/extproc/translator/openai_awsbedrock.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,51 @@
"github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream"
corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
"github.com/fatih/structs"
openaigo "github.com/openai/openai-go"
openAIconstant "github.com/openai/openai-go/shared/constant"
"k8s.io/utils/ptr"

"github.com/envoyproxy/ai-gateway/internal/apischema/awsbedrock"
"github.com/envoyproxy/ai-gateway/internal/apischema/openai"
tracing "github.com/envoyproxy/ai-gateway/internal/tracing/api"
)

// CustomChatCompletionMessage embeds the original and adds our ExtraFields.
// This is where the custom marshaling logic will live.
type CustomChatCompletionMessage struct {
openaigo.ChatCompletionMessage
ExtraFields map[string]interface{} `json:"-"`
}

// CustomChatCompletionChoice shadows the original Message field with our custom type.
type CustomChatCompletionChoice struct {
openaigo.ChatCompletionChoice
Message CustomChatCompletionMessage `json:"message"`
}

// CustomChatCompletion shadows the original Choices slice with our custom type.
type CustomChatCompletion struct {
openaigo.ChatCompletion
Choices []CustomChatCompletionChoice `json:"choices"`
}

// MarshalJSON implements a custom marshaler for CustomChatCompletionMessage.
// It merges the standard fields with the contents of ExtraFields.
func (c CustomChatCompletionMessage) MarshalJSON() ([]byte, error) {
// 1. Directly convert the embedded struct to a map using the library.
// This respects all the `json` tags on the struct's fields.
tempMap := structs.Map(c.ChatCompletionMessage)

// 2. Iterate through your ExtraFields and merge them into the map.
for key, value := range c.ExtraFields {
tempMap[key] = value
}

// 3. Marshal the final, merged map into a JSON byte slice.
return json.Marshal(tempMap)
}

// NewChatCompletionOpenAIToAWSBedrockTranslator implements [Factory] for OpenAI to AWS Bedrock translation.
func NewChatCompletionOpenAIToAWSBedrockTranslator(modelNameOverride string) OpenAIChatCompletionTranslator {
return &openAIToAWSBedrockTranslatorV1ChatCompletion{modelNameOverride: modelNameOverride}
Expand Down Expand Up @@ -82,6 +120,14 @@
bedrockReq.InferenceConfig.StopSequences = stopSequence
}

// Handle Anthropic vendor fields if present. Currently only supports thinking fields.
if openAIReq.AnthropicVendorFields != nil && openAIReq.Thinking != nil {
if bedrockReq.AdditionalModelRequestFields == nil {
bedrockReq.AdditionalModelRequestFields = make(map[string]interface{})
}
bedrockReq.AdditionalModelRequestFields["thinking"] = openAIReq.Thinking
}

// Convert Chat Completion messages.
err = o.openAIMessageToBedrockMessage(openAIReq, &bedrockReq)
if err != nil {
Expand Down Expand Up @@ -467,21 +513,20 @@

func (o *openAIToAWSBedrockTranslatorV1ChatCompletion) bedrockToolUseToOpenAICalls(
toolUse *awsbedrock.ToolUseBlock,
) *openai.ChatCompletionMessageToolCallParam {
) openaigo.ChatCompletionMessageToolCall {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

switch to a pointer return?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we just dereference the ptr right after returning, what do u think is best to do?

if toolUse == nil {
return nil
return openaigo.ChatCompletionMessageToolCall{}
}
arguments, err := json.Marshal(toolUse.Input)
if err != nil {
return nil
return openaigo.ChatCompletionMessageToolCall{}
}
return &openai.ChatCompletionMessageToolCallParam{
ID: &toolUse.ToolUseID,
Function: openai.ChatCompletionMessageToolCallFunctionParam{
return openaigo.ChatCompletionMessageToolCall{
ID: toolUse.ToolUseID,
Function: openaigo.ChatCompletionMessageToolCallFunction{
Name: toolUse.Name,
Arguments: string(arguments),
},
Type: openai.ChatCompletionMessageToolCallTypeFunction,
}
}

Expand Down Expand Up @@ -582,40 +627,50 @@
if err = json.NewDecoder(body).Decode(&bedrockResp); err != nil {
return nil, nil, tokenUsage, fmt.Errorf("failed to unmarshal body: %w", err)
}
openAIResp := &openai.ChatCompletionResponse{
Object: "chat.completion",
Choices: make([]openai.ChatCompletionResponseChoice, 0),

openAIResp := CustomChatCompletion{
Choices: make([]CustomChatCompletionChoice, 0),
}

// Convert token usage.
if bedrockResp.Usage != nil {
tokenUsage = LLMTokenUsage{
InputTokens: uint32(bedrockResp.Usage.InputTokens), //nolint:gosec
OutputTokens: uint32(bedrockResp.Usage.OutputTokens), //nolint:gosec
TotalTokens: uint32(bedrockResp.Usage.TotalTokens), //nolint:gosec
}
openAIResp.Usage = openai.ChatCompletionResponseUsage{
TotalTokens: bedrockResp.Usage.TotalTokens,
PromptTokens: bedrockResp.Usage.InputTokens,
CompletionTokens: bedrockResp.Usage.OutputTokens,
openAIResp.Usage = openaigo.CompletionUsage{
TotalTokens: int64(bedrockResp.Usage.TotalTokens),
PromptTokens: int64(bedrockResp.Usage.InputTokens),
CompletionTokens: int64(bedrockResp.Usage.OutputTokens),
}
}

// AWS Bedrock does not support N(multiple choices) > 0, so there could be only one choice.
choice := openai.ChatCompletionResponseChoice{
Index: (int64)(0),
Message: openai.ChatCompletionResponseChoiceMessage{
Role: bedrockResp.Output.Message.Role,
choice := CustomChatCompletionChoice{
Message: CustomChatCompletionMessage{
ChatCompletionMessage: openaigo.ChatCompletionMessage{
Role: openAIconstant.Assistant(bedrockResp.Output.Message.Role),
},
ExtraFields: make(map[string]interface{}),
},
FinishReason: o.bedrockStopReasonToOpenAIStopReason(bedrockResp.StopReason),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we set the finishReason inplace?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same for Index

}
choice.Index = (int64)(0)
choice.FinishReason = string(o.bedrockStopReasonToOpenAIStopReason(bedrockResp.StopReason))

for _, output := range bedrockResp.Output.Message.Content {
if toolCall := o.bedrockToolUseToOpenAICalls(output.ToolUse); toolCall != nil {
choice.Message.ToolCalls = []openai.ChatCompletionMessageToolCallParam{*toolCall}
} else if output.Text != nil {
// For the converse response the assumption is that there is only one text content block, we take the first one.
if choice.Message.Content == nil {
choice.Message.Content = output.Text
switch {
case output.ToolUse != nil:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ToolUse and Text cannot be non-nil at the same time, right?
if yes, can you add comment about it or point to docs about it

toolCall := o.bedrockToolUseToOpenAICalls(output.ToolUse)
choice.Message.ToolCalls = append(choice.Message.ToolCalls, toolCall)

case output.Text != nil:
// We expect only one text content block in the response.
if choice.Message.Content == "" {
choice.Message.Content = *output.Text
}
case output.ReasoningContent != nil && output.ReasoningContent.ReasoningText != nil:
choice.Message.ExtraFields["reasoning_content"] = *output.ReasoningContent
}
}
openAIResp.Choices = append(openAIResp.Choices, choice)
Expand All @@ -627,7 +682,7 @@
headerMutation = &extprocv3.HeaderMutation{}
setContentLength(headerMutation, mut.Body)
if span != nil {
span.RecordResponse(openAIResp)

Check failure on line 685 in internal/extproc/translator/openai_awsbedrock.go

View workflow job for this annotation

GitHub Actions / Check (ubuntu-latest)

cannot use openAIResp (variable of struct type CustomChatCompletion) as *"github.com/envoyproxy/ai-gateway/internal/apischema/openai".ChatCompletionResponse value in argument to span.RecordResponse (typecheck)

Check failure on line 685 in internal/extproc/translator/openai_awsbedrock.go

View workflow job for this annotation

GitHub Actions / Check (macos-latest)

cannot use openAIResp (variable of struct type CustomChatCompletion) as *"github.com/envoyproxy/ai-gateway/internal/apischema/openai".ChatCompletionResponse value in argument to span.RecordResponse (typecheck)

Check failure on line 685 in internal/extproc/translator/openai_awsbedrock.go

View workflow job for this annotation

GitHub Actions / Unit Test (ubuntu-latest)

cannot use openAIResp (variable of struct type CustomChatCompletion) as *"github.com/envoyproxy/ai-gateway/internal/apischema/openai".ChatCompletionResponse value in argument to span.RecordResponse

Check failure on line 685 in internal/extproc/translator/openai_awsbedrock.go

View workflow job for this annotation

GitHub Actions / External Processor Test (Envoy vlatest on ubuntu-latest)

cannot use openAIResp (variable of struct type CustomChatCompletion) as *"github.com/envoyproxy/ai-gateway/internal/apischema/openai".ChatCompletionResponse value in argument to span.RecordResponse

Check failure on line 685 in internal/extproc/translator/openai_awsbedrock.go

View workflow job for this annotation

GitHub Actions / E2E Test (Envoy Gateway v1.5.0)

cannot use openAIResp (variable of struct type CustomChatCompletion) as *"github.com/envoyproxy/ai-gateway/internal/apischema/openai".ChatCompletionResponse value in argument to span.RecordResponse

Check failure on line 685 in internal/extproc/translator/openai_awsbedrock.go

View workflow job for this annotation

GitHub Actions / External Processor Test (Envoy v1.35.0 on macos-latest)

cannot use openAIResp (variable of struct type CustomChatCompletion) as *"github.com/envoyproxy/ai-gateway/internal/apischema/openai".ChatCompletionResponse value in argument to span.RecordResponse

Check failure on line 685 in internal/extproc/translator/openai_awsbedrock.go

View workflow job for this annotation

GitHub Actions / E2E Test for Inference Extensions (Envoy Gateway v1.5.0)

cannot use openAIResp (variable of struct type CustomChatCompletion) as *"github.com/envoyproxy/ai-gateway/internal/apischema/openai".ChatCompletionResponse value in argument to span.RecordResponse

Check failure on line 685 in internal/extproc/translator/openai_awsbedrock.go

View workflow job for this annotation

GitHub Actions / External Processor Test (Envoy v1.35.0 on ubuntu-latest)

cannot use openAIResp (variable of struct type CustomChatCompletion) as *"github.com/envoyproxy/ai-gateway/internal/apischema/openai".ChatCompletionResponse value in argument to span.RecordResponse

Check failure on line 685 in internal/extproc/translator/openai_awsbedrock.go

View workflow job for this annotation

GitHub Actions / E2E Test for Inference Extensions (Envoy Gateway latest)

cannot use openAIResp (variable of struct type CustomChatCompletion) as *"github.com/envoyproxy/ai-gateway/internal/apischema/openai".ChatCompletionResponse value in argument to span.RecordResponse

Check failure on line 685 in internal/extproc/translator/openai_awsbedrock.go

View workflow job for this annotation

GitHub Actions / E2E Test (Envoy Gateway latest)

cannot use openAIResp (variable of struct type CustomChatCompletion) as *"github.com/envoyproxy/ai-gateway/internal/apischema/openai".ChatCompletionResponse value in argument to span.RecordResponse

Check failure on line 685 in internal/extproc/translator/openai_awsbedrock.go

View workflow job for this annotation

GitHub Actions / Unit Test (macos-latest)

cannot use openAIResp (variable of struct type CustomChatCompletion) as *"github.com/envoyproxy/ai-gateway/internal/apischema/openai".ChatCompletionResponse value in argument to span.RecordResponse
}
return headerMutation, &extprocv3.BodyMutation{Mutation: mut}, tokenUsage, nil
}
Expand Down
Loading
Loading