From e9dfd966ba6c29428ceb0d0f20fa72c9a8882da3 Mon Sep 17 00:00:00 2001 From: Eric Liu Date: Sun, 28 Dec 2025 23:10:37 -0800 Subject: [PATCH] feat: implement hotfix for Litellm to enhance web search handling and tool call ID management --- static/hotfix.py | 186 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 static/hotfix.py diff --git a/static/hotfix.py b/static/hotfix.py new file mode 100644 index 0000000..5e13a1a --- /dev/null +++ b/static/hotfix.py @@ -0,0 +1,186 @@ +import sys +import os +import shutil + +# 1. Prepare /tmp/patch +PATCH_DIR = "/tmp/patch" +LITELLM_DIR = os.path.join(PATCH_DIR, "litellm") + +if os.path.exists(PATCH_DIR): + shutil.rmtree(PATCH_DIR) +os.makedirs(PATCH_DIR) + +shutil.copytree("/app/litellm", LITELLM_DIR) + +# 2. Patch openai.py +openai_file = os.path.join(LITELLM_DIR, "types/llms/openai.py") +with open(openai_file, "r") as f: + content = f.read() + +tool_call_chunk_original = ( + 'class ChatCompletionToolCallChunk(TypedDict): # result of /chat/completions call\n' + ' id: Optional[str]\n' + ' type: Literal["function"]' +) +tool_call_chunk_patch = tool_call_chunk_original.replace( + 'Literal["function"]', 'Literal["function", "web_search"]' +) +delta_chunk_original = ( + "class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):\n" + " id: str\n" + ' type: Literal["function"]' +) +delta_chunk_patch = delta_chunk_original.replace( + 'Literal["function"]', 'Literal["function", "web_search"]' +) + +for original, patched, label in [ + (tool_call_chunk_original, tool_call_chunk_patch, "ChatCompletionToolCallChunk"), + (delta_chunk_original, delta_chunk_patch, "ChatCompletionDeltaToolCallChunk"), +]: + if original in content: + content = content.replace(original, patched, 1) + else: + print(f"Hotfix warning: {label} pattern not found, skipping update") + +with open(openai_file, "w") as f: + f.write(content) + +# 3. Patch transformation.py +trans_file = os.path.join(LITELLM_DIR, "completion_extras/litellm_responses_transformation/transformation.py") +with open(trans_file, "r") as f: + content = f.read() + +import_block_original = """ from litellm.types.utils import ( + ChatCompletionToolCallChunk, + GenericStreamingChunk, + ) +""" + +import_block_updated = """ from litellm.types.utils import ( + ChatCompletionToolCallChunk, + Delta, + GenericStreamingChunk, + ModelResponseStream, + StreamingChoices, + ) +""" + +if import_block_original in content: + content = content.replace(import_block_original, import_block_updated, 1) +elif import_block_updated in content: + pass +else: + print("Hotfix warning: unexpected chunk_parser import layout, skipping Delta/ModelResponseStream import patch") + +added_block = """ elif output_item.get("type") == "web_search_call": + # handle web search call - mask tool call by emitting empty content delta + # This prevents Open WebUI from seeing tool_calls and trying to execute them + action_payload = output_item.get("action") + verbose_logger.debug( + "Chat provider: masking web_search_call (added) call_id=%s action=%s", + output_item.get("call_id"), + action_payload, + ) + # Emit empty content delta instead of tool_call to mask the tool usage + return ModelResponseStream( + choices=[ + StreamingChoices( + index=0, + delta=Delta(content=""), + finish_reason=None, + ) + ] + ) +""" + +done_block = """ elif output_item.get("type") == "web_search_call": + # handle web search done - mask tool call by emitting empty content delta + # This prevents Open WebUI from seeing tool_calls and trying to execute them + action_payload = output_item.get("action") + verbose_logger.debug( + "Chat provider: masking web_search_call (done) call_id=%s action=%s", + output_item.get("call_id"), + action_payload, + ) + # Emit empty content delta instead of tool_call to mask the tool usage + # Do NOT set finish_reason="tool_calls" as that would signal Open WebUI to handle the tool + return ModelResponseStream( + choices=[ + StreamingChoices( + index=0, + delta=Delta(content=""), + finish_reason=None, + ) + ] + ) +""" + +added_target = ' elif output_item.get("type") == "message":' + +def insert_block(source: str, block: str, occurrence_index: int) -> str: + """Insert block before the nth occurrence (0-based) of added_target.""" + start = -1 + search_from = 0 + for _ in range(occurrence_index + 1): + start = source.find(added_target, search_from) + if start == -1: + return source + search_from = start + len(added_target) + return source[:start] + block + source[start:] + +if 'masking web_search_call (added)' not in content: + new_content = insert_block(content, added_block, 0) + if new_content == content: + print("Hotfix warning: unable to find insertion point for web_search_call (added)") + else: + content = new_content + +if 'masking web_search_call (done)' not in content: + new_content = insert_block(content, done_block, 1) + if new_content == content: + print("Hotfix warning: unable to find insertion point for web_search_call (done)") + else: + content = new_content + +# 4. Ensure streaming tool call chunks fall back to output_item IDs +call_id_pattern = 'id=output_item.get("call_id"),' +call_id_patch = 'id=output_item.get("call_id") or output_item.get("id"),' +if call_id_pattern in content: + content = content.replace(call_id_pattern, call_id_patch) + +# 5. Guard assistant tool_call conversions when id is missing +tool_call_block_original = """ if function: + input_tool_call = { + "type": "function_call", + "call_id": tool_call["id"], + } + if "name" in function: + input_tool_call["name"] = function["name"] + if "arguments" in function: + input_tool_call["arguments"] = function["arguments"] + input_items.append(input_tool_call) +""" +tool_call_block_patch = """ if function: + call_id = tool_call.get("id") or tool_call.get("call_id") + if not call_id: + call_id = f"auto_tool_call_{len(input_items)}" + input_tool_call = { + "type": "function_call", + "call_id": call_id, + } + if "name" in function: + input_tool_call["name"] = function["name"] + if "arguments" in function: + input_tool_call["arguments"] = function["arguments"] + input_items.append(input_tool_call) +""" +if tool_call_block_original in content: + content = content.replace(tool_call_block_original, tool_call_block_patch, 1) +elif "auto_tool_call_" not in content: + print("Hotfix warning: assistant tool_call block not found; missing id guard not applied") + +with open(trans_file, "w") as f: + f.write(content) + +print("Successfully applied hotfixes to /tmp/patch/litellm")