From e9dfd966ba6c29428ceb0d0f20fa72c9a8882da3 Mon Sep 17 00:00:00 2001
From: Eric Liu <eric@ericxliu.me>
Date: Sun, 28 Dec 2025 23:10:37 -0800
Subject: [PATCH] feat: implement hotfix for Litellm to enhance web search
 handling and tool call ID management

---
 static/hotfix.py | 186 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 186 insertions(+)
 create mode 100644 static/hotfix.py

diff --git a/static/hotfix.py b/static/hotfix.py
new file mode 100644
index 0000000..5e13a1a
--- /dev/null
+++ b/static/hotfix.py
@@ -0,0 +1,186 @@
+import sys
+import os
+import shutil
+
+# 1. Prepare /tmp/patch
+PATCH_DIR = "/tmp/patch"
+LITELLM_DIR = os.path.join(PATCH_DIR, "litellm")
+
+if os.path.exists(PATCH_DIR):
+    shutil.rmtree(PATCH_DIR)
+os.makedirs(PATCH_DIR)
+
+shutil.copytree("/app/litellm", LITELLM_DIR)
+
+# 2. Patch openai.py
+openai_file = os.path.join(LITELLM_DIR, "types/llms/openai.py")
+with open(openai_file, "r") as f:
+    content = f.read()
+
+tool_call_chunk_original = (
+    'class ChatCompletionToolCallChunk(TypedDict):  # result of /chat/completions call\n'
+    '    id: Optional[str]\n'
+    '    type: Literal["function"]'
+)
+tool_call_chunk_patch = tool_call_chunk_original.replace(
+    'Literal["function"]', 'Literal["function", "web_search"]'
+)
+delta_chunk_original = (
+    "class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):\n"
+    "    id: str\n"
+    '    type: Literal["function"]'
+)
+delta_chunk_patch = delta_chunk_original.replace(
+    'Literal["function"]', 'Literal["function", "web_search"]'
+)
+
+for original, patched, label in [
+    (tool_call_chunk_original, tool_call_chunk_patch, "ChatCompletionToolCallChunk"),
+    (delta_chunk_original, delta_chunk_patch, "ChatCompletionDeltaToolCallChunk"),
+]:
+    if original in content:
+        content = content.replace(original, patched, 1)
+    else:
+        print(f"Hotfix warning: {label} pattern not found, skipping update")
+
+with open(openai_file, "w") as f:
+    f.write(content)
+
+# 3. Patch transformation.py
+trans_file = os.path.join(LITELLM_DIR, "completion_extras/litellm_responses_transformation/transformation.py")
+with open(trans_file, "r") as f:
+    content = f.read()
+
+import_block_original = """        from litellm.types.utils import (
+            ChatCompletionToolCallChunk,
+            GenericStreamingChunk,
+        )
+"""
+
+import_block_updated = """        from litellm.types.utils import (
+            ChatCompletionToolCallChunk,
+            Delta,
+            GenericStreamingChunk,
+            ModelResponseStream,
+            StreamingChoices,
+        )
+"""
+
+if import_block_original in content:
+    content = content.replace(import_block_original, import_block_updated, 1)
+elif import_block_updated in content:
+    pass
+else:
+    print("Hotfix warning: unexpected chunk_parser import layout, skipping Delta/ModelResponseStream import patch")
+
+added_block = """            elif output_item.get("type") == "web_search_call":
+                # handle web search call - mask tool call by emitting empty content delta
+                # This prevents Open WebUI from seeing tool_calls and trying to execute them
+                action_payload = output_item.get("action")
+                verbose_logger.debug(
+                    "Chat provider: masking web_search_call (added) call_id=%s action=%s",
+                    output_item.get("call_id"),
+                    action_payload,
+                )
+                # Emit empty content delta instead of tool_call to mask the tool usage
+                return ModelResponseStream(
+                    choices=[
+                        StreamingChoices(
+                            index=0,
+                            delta=Delta(content=""),
+                            finish_reason=None,
+                        )
+                    ]
+                )
+"""
+
+done_block = """            elif output_item.get("type") == "web_search_call":
+                # handle web search done - mask tool call by emitting empty content delta
+                # This prevents Open WebUI from seeing tool_calls and trying to execute them
+                action_payload = output_item.get("action")
+                verbose_logger.debug(
+                    "Chat provider: masking web_search_call (done) call_id=%s action=%s",
+                    output_item.get("call_id"),
+                    action_payload,
+                )
+                # Emit empty content delta instead of tool_call to mask the tool usage
+                # Do NOT set finish_reason="tool_calls" as that would signal Open WebUI to handle the tool
+                return ModelResponseStream(
+                    choices=[
+                        StreamingChoices(
+                            index=0,
+                            delta=Delta(content=""),
+                            finish_reason=None,
+                        )
+                    ]
+                )
+"""
+
+added_target = '            elif output_item.get("type") == "message":'
+
+def insert_block(source: str, block: str, occurrence_index: int) -> str:
+    """Insert block before the nth occurrence (0-based) of added_target."""
+    start = -1
+    search_from = 0
+    for _ in range(occurrence_index + 1):
+        start = source.find(added_target, search_from)
+        if start == -1:
+            return source
+        search_from = start + len(added_target)
+    return source[:start] + block + source[start:]
+
+if 'masking web_search_call (added)' not in content:
+    new_content = insert_block(content, added_block, 0)
+    if new_content == content:
+        print("Hotfix warning: unable to find insertion point for web_search_call (added)")
+    else:
+        content = new_content
+
+if 'masking web_search_call (done)' not in content:
+    new_content = insert_block(content, done_block, 1)
+    if new_content == content:
+        print("Hotfix warning: unable to find insertion point for web_search_call (done)")
+    else:
+        content = new_content
+
+# 4. Ensure streaming tool call chunks fall back to output_item IDs
+call_id_pattern = 'id=output_item.get("call_id"),'
+call_id_patch = 'id=output_item.get("call_id") or output_item.get("id"),'
+if call_id_pattern in content:
+    content = content.replace(call_id_pattern, call_id_patch)
+
+# 5. Guard assistant tool_call conversions when id is missing
+tool_call_block_original = """                    if function:
+                        input_tool_call = {
+                            "type": "function_call",
+                            "call_id": tool_call["id"],
+                        }
+                        if "name" in function:
+                            input_tool_call["name"] = function["name"]
+                        if "arguments" in function:
+                            input_tool_call["arguments"] = function["arguments"]
+                        input_items.append(input_tool_call)
+"""
+tool_call_block_patch = """                    if function:
+                        call_id = tool_call.get("id") or tool_call.get("call_id")
+                        if not call_id:
+                            call_id = f"auto_tool_call_{len(input_items)}"
+                        input_tool_call = {
+                            "type": "function_call",
+                            "call_id": call_id,
+                        }
+                        if "name" in function:
+                            input_tool_call["name"] = function["name"]
+                        if "arguments" in function:
+                            input_tool_call["arguments"] = function["arguments"]
+                        input_items.append(input_tool_call)
+"""
+if tool_call_block_original in content:
+    content = content.replace(tool_call_block_original, tool_call_block_patch, 1)
+elif "auto_tool_call_" not in content:
+    print("Hotfix warning: assistant tool_call block not found; missing id guard not applied")
+
+with open(trans_file, "w") as f:
+    f.write(content)
+
+print("Successfully applied hotfixes to /tmp/patch/litellm")