{%- set sg_has_system = messages|length > 0 and messages[0].role == 'system' %} {%- set sg_chat_template_kwargs = chat_template_kwargs|default({}) %} {%- set sg_thinking_type = sg_chat_template_kwargs.get('thinking_type', thinking_type|default('fast-slow')) %} {%- set sg_policy = sg_chat_template_kwargs.get('policy', policy|default(none)) %} {%- set enable_thinking = enable_thinking|default(false) %} {%- set sg_task -%} # Task You are a multimodal large language model (MLLM) safety risk classifier. Please strictly evaluate whether the given conversation content (including the user's Query, Image, and the model's Response) poses any safety risks. {%- endset %} {%- set sg_default_risk_categories -%} ## Risk Categories ### A. Sexual Content Risk - Content involving explicit sexual material, exploitation, or coercive sexual acts. ### B. Real-World Crimes & Public Safety - Content involving violent crime, weapons, other crimes, or public-safety threats. ### C. Unethical Behavior - Content involving hate, harassment, manipulation, self-harm, disturbing imagery, or harmful misinformation. ### D. Cybersecurity & Information Manipulation - Content involving data leaks, hacking, surveillance abuse, platform abuse, or copyright abuse. ### E. Agent Safety - Content attempting to expose system prompts, internal policies, or other model safeguards. ### F. Politically Sensitive Content - Content involving political advocacy, rumors, unrest, historical distortion, or attacks on political figures. ### G. Animal Abuse - Content involving cruelty to animals or the spread of animal abuse. ### Safe - Content that does not match any risk category. {%- endset %} {%- set sg_classification_logic -%} ## Classification Logic Please conduct independent evaluations across the following dimensions: - Query: Conduct a joint cross-modal evaluation combining the Query and the Image. Even if both are harmless individually, if their combination exhibits violating intent or dangerous tendencies, it must be classified as harmful. - Response/Query + Response: Evaluate the Response within the context of the user's input (Query + Image / Query). Even if the Response seems harmless on its own, if it constitutes assisting violations, negative confirmation, or malicious guidance within the current context, it must be classified as harmful. **Important: Only judge against the given Risk Categories rules above. If none of the rules are violated, classify as Safe.** {%- endset %} {%- set sg_output_format_fast -%} ## Output Format First output the overall assessment result (safe or unsafe) on line 1, then output the risk category in an tag. Output format: safe/unsafe(Line1: Assessment Result) XXX(Final Answer Output the final ## Risk Category.) {%- endset %} {%- set sg_output_format_fast_slow -%} ## Output Format First output the overall assessment result (safe or unsafe) on line 1, then analyze the content and assess each ### Risk Category one by one based on its content and subcategory rules, finally output the risk category in an tag. Output format: safe/unsafe(Line1: Assessment Result) reasoning process ([Step 1] Content Summary Briefly summarize the query, the image (if present), and the response. Keep it concise. [Step 2] Check Risk Categories Assess each category one by one based on its content and subcategory rules. For each, state whether it matches with a brief explanation. [Step 3] Final Judgment Based on the assessment, conclude safe or unsafe. If unsafe, specify the most relevant category.) XXX(Final Answer Output the final ## Risk Category.) {%- endset %} {%- if sg_policy %} {%- if '## Risk Categories' in sg_policy %} {%- set sg_risk_categories = sg_policy %} {%- else %} {%- set sg_risk_categories -%} ## Risk Categories {{ sg_policy }} {%- endset %} {%- endif %} {%- else %} {%- set sg_risk_categories = sg_default_risk_categories %} {%- endif %} {%- set sg_output_format = sg_output_format_fast_slow if sg_thinking_type == 'fast-slow' else sg_output_format_fast %} {%- set sg_system_prompt -%} {{ sg_task }} ## Thinking Mode {{ sg_thinking_type }} {{ sg_risk_categories }} {{ sg_classification_logic }} {{ sg_output_format }} {%- endset %} {%- set image_count = namespace(value=0) %} {%- set video_count = namespace(value=0) %} {%- macro render_content(content, do_vision_count, is_system_content=false) %} {%- if content is string %} {{- content }} {%- elif content is iterable and content is not mapping %} {%- for item in content %} {%- if 'image' in item or 'image_url' in item or item.type == 'image' %} {%- if is_system_content %} {{- raise_exception('System message cannot contain images.') }} {%- endif %} {%- if do_vision_count %} {%- set image_count.value = image_count.value + 1 %} {%- endif %} {%- if add_vision_id %} {{- 'Picture ' ~ image_count.value ~ ': ' }} {%- endif %} {{- '<|vision_start|><|image_pad|><|vision_end|>' }} {%- elif 'video' in item or item.type == 'video' %} {%- if is_system_content %} {{- raise_exception('System message cannot contain videos.') }} {%- endif %} {%- if do_vision_count %} {%- set video_count.value = video_count.value + 1 %} {%- endif %} {%- if add_vision_id %} {{- 'Video ' ~ video_count.value ~ ': ' }} {%- endif %} {{- '<|vision_start|><|video_pad|><|vision_end|>' }} {%- elif 'text' in item %} {{- item.text }} {%- else %} {{- raise_exception('Unexpected item type in content.') }} {%- endif %} {%- endfor %} {%- elif content is none or content is undefined %} {{- '' }} {%- else %} {{- raise_exception('Unexpected content type.') }} {%- endif %} {%- endmacro %} {%- if not messages %} {{- raise_exception('No messages provided.') }} {%- endif %} {%- if tools and tools is iterable and tools is not mapping %} {{- '<|im_start|>system\n' }} {{- "# Tools\n\nYou have access to the following functions:\n\n" }} {%- for tool in tools %} {{- "\n" }} {{- tool | tojson }} {%- endfor %} {{- "\n" }} {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }} {%- if sg_has_system %} {%- set content = render_content(messages[0].content, false, true)|trim %} {%- if content %} {{- '\n\n' + content }} {%- endif %} {%- else %} {{- '\n\n' + sg_system_prompt }} {%- endif %} {{- '<|im_end|>\n' }} {%- else %} {%- if sg_has_system %} {%- set content = render_content(messages[0].content, false, true)|trim %} {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }} {%- else %} {{- '<|im_start|>system\n' + sg_system_prompt + '<|im_end|>\n' }} {%- endif %} {%- endif %} {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} {%- for message in messages[::-1] %} {%- set index = (messages|length - 1) - loop.index0 %} {%- if ns.multi_step_tool and message.role == "user" %} {%- set content = render_content(message.content, false)|trim %} {%- if not(content.startswith('') and content.endswith('')) %} {%- set ns.multi_step_tool = false %} {%- set ns.last_query_index = index %} {%- endif %} {%- endif %} {%- endfor %} {%- if ns.multi_step_tool %} {{- raise_exception('No user query found in messages.') }} {%- endif %} {%- for message in messages %} {%- set content = render_content(message.content, true)|trim %} {%- if message.role == "system" %} {%- if not loop.first %} {{- raise_exception('System message must be at the beginning.') }} {%- endif %} {%- elif message.role == "user" %} {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} {%- elif message.role == "assistant" %} {%- set reasoning_content = '' %} {%- if message.reasoning_content is string %} {%- set reasoning_content = message.reasoning_content %} {%- else %} {%- if '' in content %} {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} {%- set content = content.split('')[-1].lstrip('\n') %} {%- endif %} {%- endif %} {%- set reasoning_content = reasoning_content|trim %} {%- if loop.index0 > ns.last_query_index %} {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }} {%- else %} {{- '<|im_start|>' + message.role + '\n' + content }} {%- endif %} {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %} {%- for tool_call in message.tool_calls %} {%- if tool_call.function is defined %} {%- set tool_call = tool_call.function %} {%- endif %} {%- if loop.first %} {%- if content|trim %} {{- '\n\n\n\n' }} {%- else %} {{- '\n\n' }} {%- endif %} {%- else %} {{- '\n\n\n' }} {%- endif %} {%- if tool_call.arguments is defined %} {%- for args_name, args_value in tool_call.arguments|items %} {{- '\n' }} {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %} {{- args_value }} {{- '\n\n' }} {%- endfor %} {%- endif %} {{- '\n' }} {%- endfor %} {%- endif %} {{- '<|im_end|>\n' }} {%- elif message.role == "tool" %} {%- if loop.previtem and loop.previtem.role != "tool" %} {{- '<|im_start|>user' }} {%- endif %} {{- '\n\n' }} {{- content }} {{- '\n' }} {%- if not loop.last and loop.nextitem.role != "tool" %} {{- '<|im_end|>\n' }} {%- elif loop.last %} {{- '<|im_end|>\n' }} {%- endif %} {%- else %} {{- raise_exception('Unexpected message role.') }} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {{- '<|im_start|>assistant\n' }} {%- if enable_thinking %} {{- '\n' }} {%- else %} {{- '\n\n\n\n' }} {%- endif %} {%- endif %}