From 276b5c775578d052692ad6ab67a2eee67cae6910 Mon Sep 17 00:00:00 2001
From: wucm667 <stevenwucongmin@gmail.com>
Date: Tue, 19 May 2026 20:03:16 +0800
Subject: [PATCH] fix(apicompat): strip temperature/top_p for reasoning models
 in Responses conversion

gpt-5.x models served via the OpenAI Responses API reject requests that
include temperature or top_p with:
  {"detail":"Unsupported parameter: temperature"}

This caused ClaudeCode agent/subagent tool requests to fail with a 400
error when an OpenAI group had the Messages-format support enabled.

Root cause: AnthropicToResponses and ChatCompletionsToResponses were
unconditionally forwarding temperature and top_p from the incoming
request to the ResponsesRequest, even though all gpt-5.x reasoning
models reject these sampling parameters.

Fix:
- Add isReasoningModel(model string) bool helper that returns true for
  any model whose name starts with "gpt-5".
- Skip temperature and top_p when converting to ResponsesRequest for
  reasoning models. Non-reasoning models (e.g. gpt-4o) are unaffected.
- ResponsesRequest.Temperature and TopP are already *float64 with
  omitempty, so nil values are safely omitted from the JSON body.

Tests:
- TestAnthropicToResponses_TemperatureStrippedForReasoningModel
- TestAnthropicToResponses_TemperatureStrippedForAllGpt5Variants
- TestChatCompletionsToResponses_TemperatureStrippedForReasoningModel
- TestChatCompletionsToResponses_TemperaturePreservedForNonReasoningModel

Fixes #2487
---
 .../pkg/apicompat/anthropic_responses_test.go | 46 +++++++++++++++++++
 .../pkg/apicompat/anthropic_to_responses.go   | 27 ++++++++---
 .../chatcompletions_responses_test.go         | 42 +++++++++++++++++
 .../apicompat/chatcompletions_to_responses.go |  9 +++-
 4 files changed, 116 insertions(+), 8 deletions(-)

diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go
index 3a0a3bc6..7490654d 100644
--- a/backend/internal/pkg/apicompat/anthropic_responses_test.go
+++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go
@@ -1524,3 +1524,49 @@ func TestAnthropicToResponses_ToolWithNilSchema(t *testing.T) {
 	assert.JSONEq(t, `"object"`, string(params["type"]))
 	assert.JSONEq(t, `{}`, string(params["properties"]))
 }
+
+// ---------------------------------------------------------------------------
+// isReasoningModel / temperature-stripping tests
+// ---------------------------------------------------------------------------
+
+func TestAnthropicToResponses_TemperatureStrippedForReasoningModel(t *testing.T) {
+	temp := 0.7
+	req := &AnthropicRequest{
+		Model:       "gpt-5.2",
+		MaxTokens:   1024,
+		Messages:    []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+		Temperature: &temp,
+		TopP:        &temp,
+	}
+
+	resp, err := AnthropicToResponses(req)
+	require.NoError(t, err)
+	assert.Nil(t, resp.Temperature, "reasoning model: temperature must be stripped")
+	assert.Nil(t, resp.TopP, "reasoning model: top_p must be stripped")
+
+	// Verify the fields are absent from the serialised JSON.
+	b, err := json.Marshal(resp)
+	require.NoError(t, err)
+	assert.NotContains(t, string(b), `"temperature"`)
+	assert.NotContains(t, string(b), `"top_p"`)
+}
+
+func TestAnthropicToResponses_TemperatureStrippedForAllGpt5Variants(t *testing.T) {
+	temp := 1.0
+	models := []string{"gpt-5.2", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.5"}
+	for _, model := range models {
+		t.Run(model, func(t *testing.T) {
+			req := &AnthropicRequest{
+				Model:       model,
+				MaxTokens:   1024,
+				Messages:    []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+				Temperature: &temp,
+				TopP:        &temp,
+			}
+			resp, err := AnthropicToResponses(req)
+			require.NoError(t, err)
+			assert.Nil(t, resp.Temperature, "model %s: temperature must be stripped", model)
+			assert.Nil(t, resp.TopP, "model %s: top_p must be stripped", model)
+		})
+	}
+}
diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses.go b/backend/internal/pkg/apicompat/anthropic_to_responses.go
index 5f04004d..e2011bee 100644
--- a/backend/internal/pkg/apicompat/anthropic_to_responses.go
+++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go
@@ -22,12 +22,19 @@ func AnthropicToResponses(req *AnthropicRequest) (*ResponsesRequest, error) {
 	}
 
 	out := &ResponsesRequest{
-		Model:       req.Model,
-		Input:       inputJSON,
-		Temperature: req.Temperature,
-		TopP:        req.TopP,
-		Stream:      req.Stream,
-		Include:     []string{"reasoning.encrypted_content"},
+		Model:   req.Model,
+		Input:   inputJSON,
+		Stream:  req.Stream,
+		Include: []string{"reasoning.encrypted_content"},
+	}
+
+	// Reasoning models (gpt-5.x) served via the Responses API do not accept
+	// sampling parameters. Sending temperature or top_p causes a 400
+	// "Unsupported parameter" error, so we only forward them for non-reasoning
+	// models.
+	if !isReasoningModel(req.Model) {
+		out.Temperature = req.Temperature
+		out.TopP = req.TopP
 	}
 
 	storeFalse := false
@@ -437,6 +444,14 @@ func boolPtr(v bool) *bool {
 	return &v
 }
 
+// isReasoningModel reports whether model is a reasoning model that does not
+// support sampling parameters (temperature, top_p) via the Responses API.
+// All gpt-5.x models are reasoning-only; the Responses API returns
+// "Unsupported parameter: temperature" if these fields are present.
+func isReasoningModel(model string) bool {
+	return strings.HasPrefix(model, "gpt-5")
+}
+
 // normalizeToolParameters ensures the tool parameter schema is valid for
 // OpenAI's Responses API, which requires "properties" on object schemas.
 //
diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
index 25f5c475..ad26f273 100644
--- a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
@@ -331,6 +331,48 @@ func TestChatCompletionsToResponses_ServiceTier(t *testing.T) {
 	assert.Equal(t, "flex", resp.ServiceTier)
 }
 
+// ---------------------------------------------------------------------------
+// temperature / top_p stripping for reasoning models
+// ---------------------------------------------------------------------------
+
+func TestChatCompletionsToResponses_TemperatureStrippedForReasoningModel(t *testing.T) {
+	temp := 0.7
+	req := &ChatCompletionsRequest{
+		Model:       "gpt-5.2",
+		Messages:    []ChatMessage{{Role: "user", Content: json.RawMessage(`"Hi"`)}},
+		Temperature: &temp,
+		TopP:        &temp,
+	}
+
+	resp, err := ChatCompletionsToResponses(req)
+	require.NoError(t, err)
+	assert.Nil(t, resp.Temperature, "reasoning model: temperature must be stripped")
+	assert.Nil(t, resp.TopP, "reasoning model: top_p must be stripped")
+
+	// Must not appear in the serialised request body sent to the upstream.
+	b, err := json.Marshal(resp)
+	require.NoError(t, err)
+	assert.NotContains(t, string(b), `"temperature"`)
+	assert.NotContains(t, string(b), `"top_p"`)
+}
+
+func TestChatCompletionsToResponses_TemperaturePreservedForNonReasoningModel(t *testing.T) {
+	temp := 0.7
+	req := &ChatCompletionsRequest{
+		Model:       "gpt-4o",
+		Messages:    []ChatMessage{{Role: "user", Content: json.RawMessage(`"Hi"`)}},
+		Temperature: &temp,
+		TopP:        &temp,
+	}
+
+	resp, err := ChatCompletionsToResponses(req)
+	require.NoError(t, err)
+	require.NotNil(t, resp.Temperature, "non-reasoning model: temperature must be preserved")
+	assert.InDelta(t, 0.7, *resp.Temperature, 1e-9)
+	require.NotNil(t, resp.TopP, "non-reasoning model: top_p must be preserved")
+	assert.InDelta(t, 0.7, *resp.TopP, 1e-9)
+}
+
 func TestChatCompletionsToResponses_AssistantWithTextAndToolCalls(t *testing.T) {
 	req := &ChatCompletionsRequest{
 		Model: "gpt-4o",
diff --git a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
index fe2c150b..463bdd0d 100644
--- a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
@@ -30,13 +30,18 @@ func ChatCompletionsToResponses(req *ChatCompletionsRequest) (*ResponsesRequest,
 		Model:        req.Model,
 		Instructions: req.Instructions,
 		Input:        inputJSON,
-		Temperature:  req.Temperature,
-		TopP:         req.TopP,
 		Stream:       true, // upstream always streams
 		Include:      []string{"reasoning.encrypted_content"},
 		ServiceTier:  req.ServiceTier,
 	}
 
+	// Reasoning models (gpt-5.x) do not accept sampling parameters.
+	// See isReasoningModel in anthropic_to_responses.go.
+	if !isReasoningModel(req.Model) {
+		out.Temperature = req.Temperature
+		out.TopP = req.TopP
+	}
+
 	storeFalse := false
 	out.Store = &storeFalse