import os import gradio as gr from openai import OpenAI # ============================================================ # Hugging Face Spaces Secret # ============================================================ # Add this in Hugging Face: # Space → Settings → Secrets → New secret # # Name: OPENAI_API_KEY # Value: your OpenAI API key # ============================================================ GENERATION_MODELS = [ "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "gpt-4o", "gpt-4o-mini", ] REASONING_MODELS = [ "gpt-5.5", "gpt-5.1", "gpt-5-mini", "gpt-5-pro", "o3", "o4-mini", ] DEFAULT_GENERATION_MODEL = os.getenv("OPENAI_GENERATION_MODEL", "gpt-4.1") DEFAULT_REASONING_MODEL = os.getenv("OPENAI_REASONING_MODEL", "gpt-5.5") def get_openai_client(): api_key = os.getenv("OPENAI_API_KEY") if not api_key: raise ValueError( "OPENAI_API_KEY is missing. " "Please add it in Hugging Face Spaces → Settings → Secrets." ) return OpenAI(api_key=api_key) def extract_output_text(response): """ Extracts text safely from the OpenAI Responses API response. """ if hasattr(response, "output_text") and response.output_text: return response.output_text chunks = [] if hasattr(response, "output") and response.output: for item in response.output: if hasattr(item, "content") and item.content: for content in item.content: if hasattr(content, "text") and content.text: chunks.append(content.text) return "\n".join(chunks).strip() def run_generation_model( prompt, model, system_message, temperature, top_p, max_output_tokens, show_settings, ): """ Function for normal generation models only. These models are used for writing, summarization, rewriting, marketing copy, explanations, and standard chatbot-style tasks. Important: - We only pass parameters that are safe for this tab. - We do not pass frequency_penalty or presence_penalty. - We do not pass reasoning.effort here. """ try: client = get_openai_client() request_params = { "model": model, "instructions": system_message, "input": prompt, "temperature": float(temperature), "top_p": float(top_p), "max_output_tokens": int(max_output_tokens), } response = client.responses.create(**request_params) output = extract_output_text(response) if not output: output = "No output generated." if show_settings: settings = f"""GENERATION SETTINGS ------------------- Model: {model} Temperature: {temperature} Top P: {top_p} Max Output Tokens: {max_output_tokens} Note: Frequency penalty and presence penalty are intentionally not sent in this app to avoid unsupported-parameter errors. OUTPUT ------ """ return settings + output return output except Exception as e: return f"Error:\n{str(e)}" def get_safe_reasoning_effort(model, selected_effort): """ Reasoning effort support differs by model. To avoid errors: - gpt-5-pro only supports high. - gpt-5.1 supports none, low, medium, high. - Most other reasoning models safely use low, medium, high. """ if model == "gpt-5-pro": return "high" if model == "gpt-5.1": allowed = ["none", "low", "medium", "high"] return selected_effort if selected_effort in allowed else "medium" allowed = ["low", "medium", "high"] return selected_effort if selected_effort in allowed else "medium" def run_reasoning_model( prompt, model, reasoning_effort, max_output_tokens, show_settings, ): """ Function for reasoning models only. These models are used for: - Complex analysis - Coding - Multi-step reasoning - Architecture decisions - Trade-off analysis - Agent planning Important: - We pass reasoning.effort here. - We do not pass temperature/top_p here. - We do not pass frequency_penalty or presence_penalty. """ try: client = get_openai_client() safe_effort = get_safe_reasoning_effort(model, reasoning_effort) request_params = { "model": model, "input": prompt, "reasoning": { "effort": safe_effort }, "max_output_tokens": int(max_output_tokens), } response = client.responses.create(**request_params) output = extract_output_text(response) if not output: output = ( "No visible output generated. " "Try increasing Max Output Tokens because reasoning models use " "some tokens internally before producing the final answer." ) if show_settings: settings = f"""REASONING SETTINGS ------------------ Model: {model} Selected Reasoning Effort: {reasoning_effort} Used Reasoning Effort: {safe_effort} Max Output Tokens: {max_output_tokens} Note: Temperature, top_p, frequency penalty, and presence penalty are intentionally not sent for reasoning models to avoid unsupported-parameter errors. OUTPUT ------ """ return settings + output return output except Exception as e: return f"Error:\n{str(e)}" CSS = """ .gradio-container { max-width: 1200px !important; margin: auto !important; } .main-title { text-align: center; margin-bottom: 20px; } .helper-box { padding: 14px; border-radius: 12px; background: #f7f7f8; border: 1px solid #e5e7eb; margin-bottom: 16px; } .output-box textarea { font-family: monospace !important; } """ with gr.Blocks() as demo: gr.Markdown( """
# LLM Model Controls Demo Part of Decoding Data Science AI Residency A clean Gradio app for testing generation models and reasoning models separately.
""" ) gr.Markdown( """
Setup: Prompting is not Enough
""" ) with gr.Tab("Generation Models"): gr.Markdown( """ Use this tab for normal text generation tasks like LinkedIn posts, summaries, explanations, rewriting, and simple Q&A. """ ) with gr.Row(): with gr.Column(scale=1): gen_prompt = gr.Textbox( lines=7, label="Prompt", value="Write a short LinkedIn post explaining why business leaders should learn AI. Maximum 120 words.", ) gen_model = gr.Dropdown( choices=GENERATION_MODELS, label="Generation Model", value=DEFAULT_GENERATION_MODEL if DEFAULT_GENERATION_MODEL in GENERATION_MODELS else "gpt-4.1", ) gen_system_message = gr.Textbox( lines=3, label="System Message", value="You are a helpful AI instructor. Keep answers clear and practical.", ) gen_temperature = gr.Slider( minimum=0.0, maximum=2.0, step=0.01, value=0.7, label="Temperature", ) gen_top_p = gr.Slider( minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Top P", ) gen_max_output_tokens = gr.Slider( minimum=50, maximum=4000, step=50, value=500, label="Max Output Tokens", ) gen_show_settings = gr.Checkbox( value=True, label="Show Settings", ) gen_button = gr.Button("Generate", variant="primary") with gr.Column(scale=1): gen_output = gr.Textbox( lines=22, label="Output", elem_classes=["output-box"], ) gen_button.click( fn=run_generation_model, inputs=[ gen_prompt, gen_model, gen_system_message, gen_temperature, gen_top_p, gen_max_output_tokens, gen_show_settings, ], outputs=gen_output, ) with gr.Tab("Reasoning Models"): gr.Markdown( """ Use this tab for complex tasks like architecture decisions, agent planning, debugging, code reasoning, and trade-off analysis. """ ) with gr.Row(): with gr.Column(scale=1): reason_prompt = gr.Textbox( lines=9, label="Prompt", value="""A telecom company wants to build an AI customer support assistant. They have: - 50,000 past support tickets - A FAQ website - Billing policies - A small developer team Should they start with: 1. Simple prompt-based chatbot 2. RAG chatbot 3. Fine-tuning 4. Agent with tools Give a practical recommendation with trade-offs.""", ) reason_model = gr.Dropdown( choices=REASONING_MODELS, label="Reasoning Model", value=DEFAULT_REASONING_MODEL if DEFAULT_REASONING_MODEL in REASONING_MODELS else "gpt-5.5", ) reason_effort = gr.Radio( choices=["none", "low", "medium", "high"], label="Reasoning Effort", value="medium", ) reason_max_output_tokens = gr.Slider( minimum=100, maximum=12000, step=100, value=2000, label="Max Output Tokens", ) reason_show_settings = gr.Checkbox( value=True, label="Show Settings", ) reason_button = gr.Button("Reason", variant="primary") with gr.Column(scale=1): reason_output = gr.Textbox( lines=22, label="Output", elem_classes=["output-box"], ) reason_button.click( fn=run_reasoning_model, inputs=[ reason_prompt, reason_model, reason_effort, reason_max_output_tokens, reason_show_settings, ], outputs=reason_output, ) if __name__ == "__main__": demo.launch( theme=gr.themes.Soft(), css=CSS, server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)), debug=False, share=False, )