ericxliu-me/static/hotfix.py

import sys
import os
import shutil

# 1. Prepare /tmp/patch
PATCH_DIR = "/tmp/patch"
LITELLM_DIR = os.path.join(PATCH_DIR, "litellm")

if os.path.exists(PATCH_DIR):
    shutil.rmtree(PATCH_DIR)
os.makedirs(PATCH_DIR)

shutil.copytree("/app/litellm", LITELLM_DIR)

# 2. Patch openai.py
openai_file = os.path.join(LITELLM_DIR, "types/llms/openai.py")
with open(openai_file, "r") as f:
    content = f.read()

tool_call_chunk_original = (
    'class ChatCompletionToolCallChunk(TypedDict):  # result of /chat/completions call\n'
    '    id: Optional[str]\n'
    '    type: Literal["function"]'
)
tool_call_chunk_patch = tool_call_chunk_original.replace(
    'Literal["function"]', 'Literal["function", "web_search"]'
)
delta_chunk_original = (
    "class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):\n"
    "    id: str\n"
    '    type: Literal["function"]'
)
delta_chunk_patch = delta_chunk_original.replace(
    'Literal["function"]', 'Literal["function", "web_search"]'
)

for original, patched, label in [
    (tool_call_chunk_original, tool_call_chunk_patch, "ChatCompletionToolCallChunk"),
    (delta_chunk_original, delta_chunk_patch, "ChatCompletionDeltaToolCallChunk"),
]:
    if original in content:
        content = content.replace(original, patched, 1)
    else:
        print(f"Hotfix warning: {label} pattern not found, skipping update")

with open(openai_file, "w") as f:
    f.write(content)

# 3. Patch transformation.py
trans_file = os.path.join(LITELLM_DIR, "completion_extras/litellm_responses_transformation/transformation.py")
with open(trans_file, "r") as f:
    content = f.read()

import_block_original = """        from litellm.types.utils import (
            ChatCompletionToolCallChunk,
            GenericStreamingChunk,
        )
"""

import_block_updated = """        from litellm.types.utils import (
            ChatCompletionToolCallChunk,
            Delta,
            GenericStreamingChunk,
            ModelResponseStream,
            StreamingChoices,
        )
"""

if import_block_original in content:
    content = content.replace(import_block_original, import_block_updated, 1)
elif import_block_updated in content:
    pass
else:
    print("Hotfix warning: unexpected chunk_parser import layout, skipping Delta/ModelResponseStream import patch")

added_block = """            elif output_item.get("type") == "web_search_call":
                # handle web search call - mask tool call by emitting empty content delta
                # This prevents Open WebUI from seeing tool_calls and trying to execute them
                action_payload = output_item.get("action")
                verbose_logger.debug(
                    "Chat provider: masking web_search_call (added) call_id=%s action=%s",
                    output_item.get("call_id"),
                    action_payload,
                )
                # Emit empty content delta instead of tool_call to mask the tool usage
                return ModelResponseStream(
                    choices=[
                        StreamingChoices(
                            index=0,
                            delta=Delta(content=""),
                            finish_reason=None,
                        )
                    ]
                )
"""

done_block = """            elif output_item.get("type") == "web_search_call":
                # handle web search done - mask tool call by emitting empty content delta
                # This prevents Open WebUI from seeing tool_calls and trying to execute them
                action_payload = output_item.get("action")
                verbose_logger.debug(
                    "Chat provider: masking web_search_call (done) call_id=%s action=%s",
                    output_item.get("call_id"),
                    action_payload,
                )
                # Emit empty content delta instead of tool_call to mask the tool usage
                # Do NOT set finish_reason="tool_calls" as that would signal Open WebUI to handle the tool
                return ModelResponseStream(
                    choices=[
                        StreamingChoices(
                            index=0,
                            delta=Delta(content=""),
                            finish_reason=None,
                        )
                    ]
                )
"""

added_target = '            elif output_item.get("type") == "message":'

def insert_block(source: str, block: str, occurrence_index: int) -> str:
    """Insert block before the nth occurrence (0-based) of added_target."""
    start = -1
    search_from = 0
    for _ in range(occurrence_index + 1):
        start = source.find(added_target, search_from)
        if start == -1:
            return source
        search_from = start + len(added_target)
    return source[:start] + block + source[start:]

if 'masking web_search_call (added)' not in content:
    new_content = insert_block(content, added_block, 0)
    if new_content == content:
        print("Hotfix warning: unable to find insertion point for web_search_call (added)")
    else:
        content = new_content

if 'masking web_search_call (done)' not in content:
    new_content = insert_block(content, done_block, 1)
    if new_content == content:
        print("Hotfix warning: unable to find insertion point for web_search_call (done)")
    else:
        content = new_content

# 4. Ensure streaming tool call chunks fall back to output_item IDs
call_id_pattern = 'id=output_item.get("call_id"),'
call_id_patch = 'id=output_item.get("call_id") or output_item.get("id"),'
if call_id_pattern in content:
    content = content.replace(call_id_pattern, call_id_patch)

# 5. Guard assistant tool_call conversions when id is missing
tool_call_block_original = """                    if function:
                        input_tool_call = {
                            "type": "function_call",
                            "call_id": tool_call["id"],
                        }
                        if "name" in function:
                            input_tool_call["name"] = function["name"]
                        if "arguments" in function:
                            input_tool_call["arguments"] = function["arguments"]
                        input_items.append(input_tool_call)
"""
tool_call_block_patch = """                    if function:
                        call_id = tool_call.get("id") or tool_call.get("call_id")
                        if not call_id:
                            call_id = f"auto_tool_call_{len(input_items)}"
                        input_tool_call = {
                            "type": "function_call",
                            "call_id": call_id,
                        }
                        if "name" in function:
                            input_tool_call["name"] = function["name"]
                        if "arguments" in function:
                            input_tool_call["arguments"] = function["arguments"]
                        input_items.append(input_tool_call)
"""
if tool_call_block_original in content:
    content = content.replace(tool_call_block_original, tool_call_block_patch, 1)
elif "auto_tool_call_" not in content:
    print("Hotfix warning: assistant tool_call block not found; missing id guard not applied")

with open(trans_file, "w") as f:
    f.write(content)

print("Successfully applied hotfixes to /tmp/patch/litellm")