import sys import os import shutil # 1. Prepare /tmp/patch PATCH_DIR = "/tmp/patch" LITELLM_DIR = os.path.join(PATCH_DIR, "litellm") if os.path.exists(PATCH_DIR): shutil.rmtree(PATCH_DIR) os.makedirs(PATCH_DIR) shutil.copytree("/app/litellm", LITELLM_DIR) # 2. Patch openai.py openai_file = os.path.join(LITELLM_DIR, "types/llms/openai.py") with open(openai_file, "r") as f: content = f.read() tool_call_chunk_original = ( 'class ChatCompletionToolCallChunk(TypedDict): # result of /chat/completions call\n' ' id: Optional[str]\n' ' type: Literal["function"]' ) tool_call_chunk_patch = tool_call_chunk_original.replace( 'Literal["function"]', 'Literal["function", "web_search"]' ) delta_chunk_original = ( "class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):\n" " id: str\n" ' type: Literal["function"]' ) delta_chunk_patch = delta_chunk_original.replace( 'Literal["function"]', 'Literal["function", "web_search"]' ) for original, patched, label in [ (tool_call_chunk_original, tool_call_chunk_patch, "ChatCompletionToolCallChunk"), (delta_chunk_original, delta_chunk_patch, "ChatCompletionDeltaToolCallChunk"), ]: if original in content: content = content.replace(original, patched, 1) else: print(f"Hotfix warning: {label} pattern not found, skipping update") with open(openai_file, "w") as f: f.write(content) # 3. Patch transformation.py trans_file = os.path.join(LITELLM_DIR, "completion_extras/litellm_responses_transformation/transformation.py") with open(trans_file, "r") as f: content = f.read() import_block_original = """ from litellm.types.utils import ( ChatCompletionToolCallChunk, GenericStreamingChunk, ) """ import_block_updated = """ from litellm.types.utils import ( ChatCompletionToolCallChunk, Delta, GenericStreamingChunk, ModelResponseStream, StreamingChoices, ) """ if import_block_original in content: content = content.replace(import_block_original, import_block_updated, 1) elif import_block_updated in content: pass else: print("Hotfix warning: unexpected chunk_parser import layout, skipping Delta/ModelResponseStream import patch") added_block = """ elif output_item.get("type") == "web_search_call": # handle web search call - mask tool call by emitting empty content delta # This prevents Open WebUI from seeing tool_calls and trying to execute them action_payload = output_item.get("action") verbose_logger.debug( "Chat provider: masking web_search_call (added) call_id=%s action=%s", output_item.get("call_id"), action_payload, ) # Emit empty content delta instead of tool_call to mask the tool usage return ModelResponseStream( choices=[ StreamingChoices( index=0, delta=Delta(content=""), finish_reason=None, ) ] ) """ done_block = """ elif output_item.get("type") == "web_search_call": # handle web search done - mask tool call by emitting empty content delta # This prevents Open WebUI from seeing tool_calls and trying to execute them action_payload = output_item.get("action") verbose_logger.debug( "Chat provider: masking web_search_call (done) call_id=%s action=%s", output_item.get("call_id"), action_payload, ) # Emit empty content delta instead of tool_call to mask the tool usage # Do NOT set finish_reason="tool_calls" as that would signal Open WebUI to handle the tool return ModelResponseStream( choices=[ StreamingChoices( index=0, delta=Delta(content=""), finish_reason=None, ) ] ) """ added_target = ' elif output_item.get("type") == "message":' def insert_block(source: str, block: str, occurrence_index: int) -> str: """Insert block before the nth occurrence (0-based) of added_target.""" start = -1 search_from = 0 for _ in range(occurrence_index + 1): start = source.find(added_target, search_from) if start == -1: return source search_from = start + len(added_target) return source[:start] + block + source[start:] if 'masking web_search_call (added)' not in content: new_content = insert_block(content, added_block, 0) if new_content == content: print("Hotfix warning: unable to find insertion point for web_search_call (added)") else: content = new_content if 'masking web_search_call (done)' not in content: new_content = insert_block(content, done_block, 1) if new_content == content: print("Hotfix warning: unable to find insertion point for web_search_call (done)") else: content = new_content # 4. Ensure streaming tool call chunks fall back to output_item IDs call_id_pattern = 'id=output_item.get("call_id"),' call_id_patch = 'id=output_item.get("call_id") or output_item.get("id"),' if call_id_pattern in content: content = content.replace(call_id_pattern, call_id_patch) # 5. Guard assistant tool_call conversions when id is missing tool_call_block_original = """ if function: input_tool_call = { "type": "function_call", "call_id": tool_call["id"], } if "name" in function: input_tool_call["name"] = function["name"] if "arguments" in function: input_tool_call["arguments"] = function["arguments"] input_items.append(input_tool_call) """ tool_call_block_patch = """ if function: call_id = tool_call.get("id") or tool_call.get("call_id") if not call_id: call_id = f"auto_tool_call_{len(input_items)}" input_tool_call = { "type": "function_call", "call_id": call_id, } if "name" in function: input_tool_call["name"] = function["name"] if "arguments" in function: input_tool_call["arguments"] = function["arguments"] input_items.append(input_tool_call) """ if tool_call_block_original in content: content = content.replace(tool_call_block_original, tool_call_block_patch, 1) elif "auto_tool_call_" not in content: print("Hotfix warning: assistant tool_call block not found; missing id guard not applied") with open(trans_file, "w") as f: f.write(content) print("Successfully applied hotfixes to /tmp/patch/litellm")