import os import gradio as gr from openai import OpenAI # ============================================================ # Hugging Face Spaces Secret # ============================================================ # Add this in Hugging Face: # Space → Settings → Secrets → New secret # # Name: OPENAI_API_KEY # Value: your OpenAI API key # ============================================================ GENERATION_MODELS = [ "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "gpt-4o", "gpt-4o-mini", ] REASONING_MODELS = [ "gpt-5.5", "gpt-5.1", "gpt-5-mini", "gpt-5-pro", "o3", "o4-mini", ] DEFAULT_GENERATION_MODEL = os.getenv("OPENAI_GENERATION_MODEL", "gpt-4.1") DEFAULT_REASONING_MODEL = os.getenv("OPENAI_REASONING_MODEL", "gpt-5.5") def get_openai_client(): api_key = os.getenv("OPENAI_API_KEY") if not api_key: raise ValueError( "OPENAI_API_KEY is missing. " "Please add it in Hugging Face Spaces → Settings → Secrets." ) return OpenAI(api_key=api_key) def extract_output_text(response): """ Extracts text safely from the OpenAI Responses API response. """ if hasattr(response, "output_text") and response.output_text: return response.output_text chunks = [] if hasattr(response, "output") and response.output: for item in response.output: if hasattr(item, "content") and item.content: for content in item.content: if hasattr(content, "text") and content.text: chunks.append(content.text) return "\n".join(chunks).strip() def run_generation_model( prompt, model, system_message, temperature, top_p, max_output_tokens, show_settings, ): """ Function for normal generation models only. These models are used for writing, summarization, rewriting, marketing copy, explanations, and standard chatbot-style tasks. Important: - We only pass parameters that are safe for this tab. - We do not pass frequency_penalty or presence_penalty. - We do not pass reasoning.effort here. """ try: client = get_openai_client() request_params = { "model": model, "instructions": system_message, "input": prompt, "temperature": float(temperature), "top_p": float(top_p), "max_output_tokens": int(max_output_tokens), } response = client.responses.create(**request_params) output = extract_output_text(response) if not output: output = "No output generated." if show_settings: settings = f"""GENERATION SETTINGS ------------------- Model: {model} Temperature: {temperature} Top P: {top_p} Max Output Tokens: {max_output_tokens} Note: Frequency penalty and presence penalty are intentionally not sent in this app to avoid unsupported-parameter errors. OUTPUT ------ """ return settings + output return output except Exception as e: return f"Error:\n{str(e)}" def get_safe_reasoning_effort(model, selected_effort): """ Reasoning effort support differs by model. To avoid errors: - gpt-5-pro only supports high. - gpt-5.1 supports none, low, medium, high. - Most other reasoning models safely use low, medium, high. """ if model == "gpt-5-pro": return "high" if model == "gpt-5.1": allowed = ["none", "low", "medium", "high"] return selected_effort if selected_effort in allowed else "medium" allowed = ["low", "medium", "high"] return selected_effort if selected_effort in allowed else "medium" def run_reasoning_model( prompt, model, reasoning_effort, max_output_tokens, show_settings, ): """ Function for reasoning models only. These models are used for: - Complex analysis - Coding - Multi-step reasoning - Architecture decisions - Trade-off analysis - Agent planning Important: - We pass reasoning.effort here. - We do not pass temperature/top_p here. - We do not pass frequency_penalty or presence_penalty. """ try: client = get_openai_client() safe_effort = get_safe_reasoning_effort(model, reasoning_effort) request_params = { "model": model, "input": prompt, "reasoning": { "effort": safe_effort }, "max_output_tokens": int(max_output_tokens), } response = client.responses.create(**request_params) output = extract_output_text(response) if not output: output = ( "No visible output generated. " "Try increasing Max Output Tokens because reasoning models use " "some tokens internally before producing the final answer." ) if show_settings: settings = f"""REASONING SETTINGS ------------------ Model: {model} Selected Reasoning Effort: {reasoning_effort} Used Reasoning Effort: {safe_effort} Max Output Tokens: {max_output_tokens} Note: Temperature, top_p, frequency penalty, and presence penalty are intentionally not sent for reasoning models to avoid unsupported-parameter errors. OUTPUT ------ """ return settings + output return output except Exception as e: return f"Error:\n{str(e)}" CSS = """ .gradio-container { max-width: 1200px !important; margin: auto !important; } .main-title { text-align: center; margin-bottom: 20px; } .helper-box { padding: 14px; border-radius: 12px; background: #f7f7f8; border: 1px solid #e5e7eb; margin-bottom: 16px; } .output-box textarea { font-family: monospace !important; } """ with gr.Blocks() as demo: gr.Markdown( """