deploy: e9dfd966ba
This commit is contained in:
186
hotfix.py
Normal file
186
hotfix.py
Normal file
@@ -0,0 +1,186 @@
|
||||
import sys
|
||||
import os
|
||||
import shutil
|
||||
|
||||
# 1. Prepare /tmp/patch
|
||||
PATCH_DIR = "/tmp/patch"
|
||||
LITELLM_DIR = os.path.join(PATCH_DIR, "litellm")
|
||||
|
||||
if os.path.exists(PATCH_DIR):
|
||||
shutil.rmtree(PATCH_DIR)
|
||||
os.makedirs(PATCH_DIR)
|
||||
|
||||
shutil.copytree("/app/litellm", LITELLM_DIR)
|
||||
|
||||
# 2. Patch openai.py
|
||||
openai_file = os.path.join(LITELLM_DIR, "types/llms/openai.py")
|
||||
with open(openai_file, "r") as f:
|
||||
content = f.read()
|
||||
|
||||
tool_call_chunk_original = (
|
||||
'class ChatCompletionToolCallChunk(TypedDict): # result of /chat/completions call\n'
|
||||
' id: Optional[str]\n'
|
||||
' type: Literal["function"]'
|
||||
)
|
||||
tool_call_chunk_patch = tool_call_chunk_original.replace(
|
||||
'Literal["function"]', 'Literal["function", "web_search"]'
|
||||
)
|
||||
delta_chunk_original = (
|
||||
"class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):\n"
|
||||
" id: str\n"
|
||||
' type: Literal["function"]'
|
||||
)
|
||||
delta_chunk_patch = delta_chunk_original.replace(
|
||||
'Literal["function"]', 'Literal["function", "web_search"]'
|
||||
)
|
||||
|
||||
for original, patched, label in [
|
||||
(tool_call_chunk_original, tool_call_chunk_patch, "ChatCompletionToolCallChunk"),
|
||||
(delta_chunk_original, delta_chunk_patch, "ChatCompletionDeltaToolCallChunk"),
|
||||
]:
|
||||
if original in content:
|
||||
content = content.replace(original, patched, 1)
|
||||
else:
|
||||
print(f"Hotfix warning: {label} pattern not found, skipping update")
|
||||
|
||||
with open(openai_file, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
# 3. Patch transformation.py
|
||||
trans_file = os.path.join(LITELLM_DIR, "completion_extras/litellm_responses_transformation/transformation.py")
|
||||
with open(trans_file, "r") as f:
|
||||
content = f.read()
|
||||
|
||||
import_block_original = """ from litellm.types.utils import (
|
||||
ChatCompletionToolCallChunk,
|
||||
GenericStreamingChunk,
|
||||
)
|
||||
"""
|
||||
|
||||
import_block_updated = """ from litellm.types.utils import (
|
||||
ChatCompletionToolCallChunk,
|
||||
Delta,
|
||||
GenericStreamingChunk,
|
||||
ModelResponseStream,
|
||||
StreamingChoices,
|
||||
)
|
||||
"""
|
||||
|
||||
if import_block_original in content:
|
||||
content = content.replace(import_block_original, import_block_updated, 1)
|
||||
elif import_block_updated in content:
|
||||
pass
|
||||
else:
|
||||
print("Hotfix warning: unexpected chunk_parser import layout, skipping Delta/ModelResponseStream import patch")
|
||||
|
||||
added_block = """ elif output_item.get("type") == "web_search_call":
|
||||
# handle web search call - mask tool call by emitting empty content delta
|
||||
# This prevents Open WebUI from seeing tool_calls and trying to execute them
|
||||
action_payload = output_item.get("action")
|
||||
verbose_logger.debug(
|
||||
"Chat provider: masking web_search_call (added) call_id=%s action=%s",
|
||||
output_item.get("call_id"),
|
||||
action_payload,
|
||||
)
|
||||
# Emit empty content delta instead of tool_call to mask the tool usage
|
||||
return ModelResponseStream(
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
index=0,
|
||||
delta=Delta(content=""),
|
||||
finish_reason=None,
|
||||
)
|
||||
]
|
||||
)
|
||||
"""
|
||||
|
||||
done_block = """ elif output_item.get("type") == "web_search_call":
|
||||
# handle web search done - mask tool call by emitting empty content delta
|
||||
# This prevents Open WebUI from seeing tool_calls and trying to execute them
|
||||
action_payload = output_item.get("action")
|
||||
verbose_logger.debug(
|
||||
"Chat provider: masking web_search_call (done) call_id=%s action=%s",
|
||||
output_item.get("call_id"),
|
||||
action_payload,
|
||||
)
|
||||
# Emit empty content delta instead of tool_call to mask the tool usage
|
||||
# Do NOT set finish_reason="tool_calls" as that would signal Open WebUI to handle the tool
|
||||
return ModelResponseStream(
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
index=0,
|
||||
delta=Delta(content=""),
|
||||
finish_reason=None,
|
||||
)
|
||||
]
|
||||
)
|
||||
"""
|
||||
|
||||
added_target = ' elif output_item.get("type") == "message":'
|
||||
|
||||
def insert_block(source: str, block: str, occurrence_index: int) -> str:
|
||||
"""Insert block before the nth occurrence (0-based) of added_target."""
|
||||
start = -1
|
||||
search_from = 0
|
||||
for _ in range(occurrence_index + 1):
|
||||
start = source.find(added_target, search_from)
|
||||
if start == -1:
|
||||
return source
|
||||
search_from = start + len(added_target)
|
||||
return source[:start] + block + source[start:]
|
||||
|
||||
if 'masking web_search_call (added)' not in content:
|
||||
new_content = insert_block(content, added_block, 0)
|
||||
if new_content == content:
|
||||
print("Hotfix warning: unable to find insertion point for web_search_call (added)")
|
||||
else:
|
||||
content = new_content
|
||||
|
||||
if 'masking web_search_call (done)' not in content:
|
||||
new_content = insert_block(content, done_block, 1)
|
||||
if new_content == content:
|
||||
print("Hotfix warning: unable to find insertion point for web_search_call (done)")
|
||||
else:
|
||||
content = new_content
|
||||
|
||||
# 4. Ensure streaming tool call chunks fall back to output_item IDs
|
||||
call_id_pattern = 'id=output_item.get("call_id"),'
|
||||
call_id_patch = 'id=output_item.get("call_id") or output_item.get("id"),'
|
||||
if call_id_pattern in content:
|
||||
content = content.replace(call_id_pattern, call_id_patch)
|
||||
|
||||
# 5. Guard assistant tool_call conversions when id is missing
|
||||
tool_call_block_original = """ if function:
|
||||
input_tool_call = {
|
||||
"type": "function_call",
|
||||
"call_id": tool_call["id"],
|
||||
}
|
||||
if "name" in function:
|
||||
input_tool_call["name"] = function["name"]
|
||||
if "arguments" in function:
|
||||
input_tool_call["arguments"] = function["arguments"]
|
||||
input_items.append(input_tool_call)
|
||||
"""
|
||||
tool_call_block_patch = """ if function:
|
||||
call_id = tool_call.get("id") or tool_call.get("call_id")
|
||||
if not call_id:
|
||||
call_id = f"auto_tool_call_{len(input_items)}"
|
||||
input_tool_call = {
|
||||
"type": "function_call",
|
||||
"call_id": call_id,
|
||||
}
|
||||
if "name" in function:
|
||||
input_tool_call["name"] = function["name"]
|
||||
if "arguments" in function:
|
||||
input_tool_call["arguments"] = function["arguments"]
|
||||
input_items.append(input_tool_call)
|
||||
"""
|
||||
if tool_call_block_original in content:
|
||||
content = content.replace(tool_call_block_original, tool_call_block_patch, 1)
|
||||
elif "auto_tool_call_" not in content:
|
||||
print("Hotfix warning: assistant tool_call block not found; missing id guard not applied")
|
||||
|
||||
with open(trans_file, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
print("Successfully applied hotfixes to /tmp/patch/litellm")
|
||||
Reference in New Issue
Block a user