feat: Add support for Anthropic's thinking_blocks format in LiteLLM integration

GWeale · copybara-github · commit fc45fa68d75f · 2026-03-17T11:45:37.000-07:00
This change enables the LiteLLM adapter to correctly parse and generate Anthropic's structured "thinking_blocks" format, which includes a "signature" for each thought block. The "signature" is crucial for Anthropic models to maintain their reasoning state across multiple turns, particularly when tool calls are made Close #4801 Co-authored-by: George Weale <gweale@google.com> PiperOrigin-RevId: 885131757
diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
@@ -384,8 +384,42 @@ def _iter_reasoning_texts(reasoning_value: Any) -> Iterable[str]:
     yield str(reasoning_value)
 
 
+def _is_thinking_blocks_format(reasoning_value: Any) -> bool:
+  """Returns True if reasoning_value is Anthropic thinking_blocks format.
+
+  Anthropic thinking_blocks is a list of dicts, each with 'type', 'thinking',
+  and 'signature' keys.
+  """
+  if not isinstance(reasoning_value, list) or not reasoning_value:
+    return False
+  first = reasoning_value[0]
+  return isinstance(first, dict) and "signature" in first
+
+
 def _convert_reasoning_value_to_parts(reasoning_value: Any) -> List[types.Part]:
-  """Converts provider reasoning payloads into Gemini thought parts."""
+  """Converts provider reasoning payloads into Gemini thought parts.
+
+  Handles Anthropic thinking_blocks (list of dicts with type/thinking/signature)
+  by preserving the signature on each part's thought_signature field. This is
+  required for Anthropic to maintain thinking across tool call boundaries.
+  """
+  if _is_thinking_blocks_format(reasoning_value):
+    parts: List[types.Part] = []
+    for block in reasoning_value:
+      if not isinstance(block, dict):
+        continue
+      block_type = block.get("type", "")
+      if block_type == "redacted":
+        continue
+      thinking_text = block.get("thinking", "")
+      signature = block.get("signature", "")
+      if not thinking_text:
+        continue
+      part = types.Part(text=thinking_text, thought=True)
+      if signature:
+        part.thought_signature = signature.encode("utf-8")
+      parts.append(part)
+    return parts
   return [
       types.Part(text=text, thought=True)
       for text in _iter_reasoning_texts(reasoning_value)
@@ -396,12 +430,19 @@ def _convert_reasoning_value_to_parts(reasoning_value: Any) -> List[types.Part]:
 def _extract_reasoning_value(message: Message | Delta | None) -> Any:
   """Fetches the reasoning payload from a LiteLLM message.
 
-  Checks for both 'reasoning_content' (LiteLLM standard, used by Azure/Foundry,
-  Ollama via LiteLLM) and 'reasoning' (used by LM Studio, vLLM).
-  Prioritizes 'reasoning_content' when both are present.
+  Checks for 'thinking_blocks' (Anthropic structured format with signatures),
+  'reasoning_content' (LiteLLM standard, used by Azure/Foundry, Ollama via
+  LiteLLM) and 'reasoning' (used by LM Studio, vLLM).
+  Prioritizes 'thinking_blocks' when present (Anthropic models), then
+  'reasoning_content', then 'reasoning'.
   """
   if message is None:
     return None
+  # Anthropic models return thinking_blocks with type/thinking/signature fields.
+  # This must be preserved to maintain thinking across tool call boundaries.
+  thinking_blocks = message.get("thinking_blocks")
+  if thinking_blocks is not None:
+    return thinking_blocks
   reasoning_content = message.get("reasoning_content")
   if reasoning_content is not None:
     return reasoning_content
@@ -835,6 +876,30 @@ async def _content_to_message_param(
           else final_content
       )
 
+    # For Anthropic models, rebuild thinking_blocks with signatures so that
+    # thinking is preserved across tool call boundaries. Without this,
+    # Anthropic silently drops thinking after the first turn.
+    if model and _is_anthropic_model(model) and reasoning_parts:
+      thinking_blocks = []
+      for part in reasoning_parts:
+        if part.text and part.thought_signature:
+          sig = part.thought_signature
+          if isinstance(sig, bytes):
+            sig = sig.decode("utf-8")
+          thinking_blocks.append({
+              "type": "thinking",
+              "thinking": part.text,
+              "signature": sig,
+          })
+      if thinking_blocks:
+        msg = ChatCompletionAssistantMessage(
+            role=role,
+            content=final_content,
+            tool_calls=tool_calls or None,
+        )
+        msg["thinking_blocks"] = thinking_blocks  # type: ignore[typeddict-unknown-key]
+        return msg
+
     reasoning_texts = []
     for part in reasoning_parts:
       if part.text:
@@ -1943,6 +2008,31 @@ def _build_request_log(req: LlmRequest) -> str:
 """
 
 
+def _is_anthropic_model(model_string: str) -> bool:
+  """Check if the model is an Anthropic Claude model accessed via LiteLLM.
+
+  Detects models using the anthropic/ provider prefix, bedrock/ models that
+  contain 'anthropic' or 'claude', and vertex_ai/ models that contain 'claude'.
+
+  Args:
+    model_string: A LiteLLM model string (e.g., "anthropic/claude-4-sonnet",
+      "bedrock/anthropic.claude-3-5-sonnet", "vertex_ai/claude-4-sonnet")
+
+  Returns:
+    True if it's an Anthropic Claude model, False otherwise.
+  """
+  lower = model_string.lower()
+  if lower.startswith("anthropic/"):
+    return True
+  if lower.startswith("bedrock/"):
+    model_part = lower.split("/", 1)[1]
+    return "anthropic" in model_part or "claude" in model_part
+  if lower.startswith("vertex_ai/"):
+    model_part = lower.split("/", 1)[1]
+    return "claude" in model_part
+  return False
+
+
 def _is_litellm_vertex_model(model_string: str) -> bool:
   """Check if the model is a Vertex AI model accessed via LiteLLM.
 
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
@@ -28,6 +28,7 @@
 
 from google.adk.models.lite_llm import _append_fallback_user_content_if_missing
 from google.adk.models.lite_llm import _content_to_message_param
+from google.adk.models.lite_llm import _convert_reasoning_value_to_parts
 from google.adk.models.lite_llm import _enforce_strict_openai_schema
 from google.adk.models.lite_llm import _extract_reasoning_value
 from google.adk.models.lite_llm import _extract_thought_signature_from_tool_call
@@ -37,6 +38,7 @@
 from google.adk.models.lite_llm import _get_completion_inputs
 from google.adk.models.lite_llm import _get_content
 from google.adk.models.lite_llm import _get_provider_from_model
+from google.adk.models.lite_llm import _is_anthropic_model
 from google.adk.models.lite_llm import _message_to_generate_content_response
 from google.adk.models.lite_llm import _MISSING_TOOL_RESULT_MESSAGE
 from google.adk.models.lite_llm import _model_response_to_chunk
@@ -4682,3 +4684,213 @@ def test_handles_litellm_logger_names(logger_name):
   finally:
     # Clean up
     test_logger.removeHandler(handler)
+
+
+# ── Anthropic thinking_blocks tests ─────────────────────────────
+
+
+@pytest.mark.parametrize(
+    "model_string,expected",
+    [
+        ("anthropic/claude-4-sonnet", True),
+        ("anthropic/claude-3-5-sonnet-20241022", True),
+        ("Anthropic/Claude-4-Opus", True),
+        ("bedrock/anthropic.claude-3-5-sonnet", True),
+        ("bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0", True),
+        ("bedrock/claude-3-5-sonnet", True),
+        ("vertex_ai/claude-3-5-sonnet@20241022", True),
+        ("openai/gpt-4o", False),
+        ("gemini/gemini-2.5-pro", False),
+        ("vertex_ai/gemini-2.5-flash", False),
+        ("bedrock/amazon.titan-text-express-v1", False),
+    ],
+    ids=[
+        "anthropic-prefix",
+        "anthropic-versioned",
+        "anthropic-uppercase",
+        "bedrock-anthropic-dot",
+        "bedrock-us-anthropic",
+        "bedrock-claude",
+        "vertex-claude",
+        "openai-no-match",
+        "gemini-no-match",
+        "vertex-gemini-no-match",
+        "bedrock-non-anthropic",
+    ],
+)
+def test_is_anthropic_model(model_string, expected):
+  assert _is_anthropic_model(model_string) is expected
+
+
+def test_extract_reasoning_value_prefers_thinking_blocks():
+  """thinking_blocks takes precedence over reasoning_content."""
+  thinking_blocks = [
+      {"type": "thinking", "thinking": "deep thought", "signature": "sig123"},
+  ]
+  message = {
+      "role": "assistant",
+      "content": "Answer",
+      "thinking_blocks": thinking_blocks,
+      "reasoning_content": "flat reasoning",
+  }
+  result = _extract_reasoning_value(message)
+  assert result is thinking_blocks
+
+
+def test_extract_reasoning_value_falls_back_without_thinking_blocks():
+  """When thinking_blocks is absent, falls back to reasoning_content."""
+  message = {
+      "role": "assistant",
+      "content": "Answer",
+      "reasoning_content": "flat reasoning",
+  }
+  result = _extract_reasoning_value(message)
+  assert result == "flat reasoning"
+
+
+def test_convert_reasoning_value_to_parts_thinking_blocks_preserves_signature():
+  """thinking_blocks format produces parts with thought_signature."""
+  thinking_blocks = [
+      {"type": "thinking", "thinking": "step 1", "signature": "sig_abc"},
+      {"type": "thinking", "thinking": "step 2", "signature": "sig_def"},
+  ]
+  parts = _convert_reasoning_value_to_parts(thinking_blocks)
+  assert len(parts) == 2
+  assert parts[0].text == "step 1"
+  assert parts[0].thought is True
+  assert parts[0].thought_signature == b"sig_abc"
+  assert parts[1].text == "step 2"
+  assert parts[1].thought_signature == b"sig_def"
+
+
+def test_convert_reasoning_value_to_parts_skips_redacted_blocks():
+  """Redacted thinking blocks are excluded from parts."""
+  thinking_blocks = [
+      {"type": "thinking", "thinking": "visible", "signature": "sig1"},
+      {"type": "redacted", "data": "hidden"},
+  ]
+  parts = _convert_reasoning_value_to_parts(thinking_blocks)
+  assert len(parts) == 1
+  assert parts[0].text == "visible"
+
+
+def test_convert_reasoning_value_to_parts_skips_empty_thinking():
+  """Blocks with empty thinking text are excluded."""
+  thinking_blocks = [
+      {"type": "thinking", "thinking": "", "signature": "sig1"},
+      {"type": "thinking", "thinking": "real thought", "signature": "sig2"},
+  ]
+  parts = _convert_reasoning_value_to_parts(thinking_blocks)
+  assert len(parts) == 1
+  assert parts[0].text == "real thought"
+
+
+def test_convert_reasoning_value_to_parts_flat_string_unchanged():
+  """Flat string reasoning still produces thought parts without signature."""
+  parts = _convert_reasoning_value_to_parts("simple reasoning text")
+  assert len(parts) == 1
+  assert parts[0].text == "simple reasoning text"
+  assert parts[0].thought is True
+  assert parts[0].thought_signature is None
+
+
+@pytest.mark.asyncio
+async def test_content_to_message_param_anthropic_outputs_thinking_blocks():
+  """For Anthropic models, thinking_blocks are output instead of reasoning_content."""
+  content = types.Content(
+      role="model",
+      parts=[
+          types.Part(
+              text="deep thought",
+              thought=True,
+              thought_signature=b"sig_round_trip",
+          ),
+          types.Part(text="Hello!"),
+      ],
+  )
+  result = await _content_to_message_param(
+      content, model="anthropic/claude-4-sonnet"
+  )
+  assert result["role"] == "assistant"
+  assert "thinking_blocks" in result
+  assert result.get("reasoning_content") is None
+  blocks = result["thinking_blocks"]
+  assert len(blocks) == 1
+  assert blocks[0]["type"] == "thinking"
+  assert blocks[0]["thinking"] == "deep thought"
+  assert blocks[0]["signature"] == "sig_round_trip"
+  assert result["content"] == "Hello!"
+
+
+@pytest.mark.asyncio
+async def test_content_to_message_param_non_anthropic_uses_reasoning_content():
+  """For non-Anthropic models, reasoning_content is used as before."""
+  content = types.Content(
+      role="model",
+      parts=[
+          types.Part(text="thinking text", thought=True),
+          types.Part(text="Answer"),
+      ],
+  )
+  result = await _content_to_message_param(content, model="openai/gpt-4o")
+  assert result["role"] == "assistant"
+  assert result.get("reasoning_content") == "thinking text"
+  assert "thinking_blocks" not in result
+
+
+@pytest.mark.asyncio
+async def test_anthropic_thinking_blocks_round_trip():
+  """End-to-end: thinking_blocks in response → Part → thinking_blocks out."""
+  # Simulate LiteLLM response with thinking_blocks
+  response_message = {
+      "role": "assistant",
+      "content": "Final answer",
+      "thinking_blocks": [
+          {
+              "type": "thinking",
+              "thinking": "Let me reason...",
+              "signature": "abc123signature",
+          },
+      ],
+  }
+
+  # Step 1: Extract reasoning value
+  reasoning_value = _extract_reasoning_value(response_message)
+  assert isinstance(reasoning_value, list)
+
+  # Step 2: Convert to parts (preserves signature)
+  parts = _convert_reasoning_value_to_parts(reasoning_value)
+  assert len(parts) == 1
+  assert parts[0].thought_signature == b"abc123signature"
+
+  # Step 3: Build Content for history
+  all_parts = parts + [types.Part(text="Final answer")]
+  content = types.Content(role="model", parts=all_parts)
+
+  # Step 4: Convert back to message param for Anthropic
+  result = await _content_to_message_param(
+      content, model="anthropic/claude-4-sonnet"
+  )
+  blocks = result["thinking_blocks"]
+  assert len(blocks) == 1
+  assert blocks[0]["type"] == "thinking"
+  assert blocks[0]["thinking"] == "Let me reason..."
+  assert blocks[0]["signature"] == "abc123signature"
+
+
+@pytest.mark.asyncio
+async def test_content_to_message_param_anthropic_no_signature_falls_back():
+  """Anthropic model with thought parts but no signatures uses reasoning_content."""
+  content = types.Content(
+      role="model",
+      parts=[
+          types.Part(text="thinking without sig", thought=True),
+          types.Part(text="Response"),
+      ],
+  )
+  result = await _content_to_message_param(
+      content, model="anthropic/claude-4-sonnet"
+  )
+  # Falls back to reasoning_content when no signatures present
+  assert result.get("reasoning_content") == "thinking without sig"
+  assert "thinking_blocks" not in result