import gradio as gr
import json
import os
from loguru import logger

class AudioInstructTab:
    def __init__(self, speech_service):
        self.service = speech_service
        self.prompt_audio_path_example = "audio/00000309-00000300.wav"

    def create_tab(self):
        with gr.TabItem("可控 TTS (Audio Instruct)"):
            gr.Markdown("## 可控语音合成演示")

            with gr.Tabs() as sub_tabs:
                # --- Tab 1: 结构化模式 ---
                with gr.TabItem("结构化模式"):
                    with gr.Row():
                        with gr.Column(scale=1):
                            instruct_type = gr.Radio(
                                [
                                    ("基础 (basic)", "basic"),
                                    ("方言 (dialect)", "dialect"),
                                    ("情感 (emotion)", "emotion"),
                                    ("IP (IP)", "IP"),
                                    ("风格 (style)", "style")
                                ],
                                label="指令类型",
                                value="emotion"
                            )
                            text_input = gr.Textbox(label="输入文本")
                            prompt_audio = gr.Audio(type="filepath", label="参考音频")
                            speaker_id = gr.Textbox(label="说话人ID", value="speaker_1")

                            # 动态显示的控件组
                            with gr.Group(visible=False) as basic_controls:
                                pitch_radio = gr.Radio(["低", "中", "高"], label="基频", value="中")
                                volume_radio = gr.Radio(["低", "中", "高"], label="音量", value="中")
                                speed_radio = gr.Radio(["慢速", "中速", "快速"], label="语速", value="中速")

                            with gr.Group(visible=False) as dialect_controls:
                                dialect_input = gr.Textbox(label="方言")
                            with gr.Group(visible=True) as emotion_controls:
                                emotion_input = gr.Textbox(label="情感")
                            with gr.Group(visible=False) as ip_controls:
                                ip_character_input = gr.Textbox(label="IP角色")
                                album_input = gr.Textbox(label="所属剧名", placeholder="可选")
                            with gr.Group(visible=False) as style_controls:
                                style_input = gr.Textbox(label="风格")

                            seed = gr.Number(value=1234, label="随机种子", precision=0)

                            # 示例列表 (Adapting examples from input)
                            # Note: Path adjustment needed for real environment
                            examples_data = [
                                # Gradio 的坑：如果某列全部都是 None, 将会在 inputs 中缺失该列，导致绑定错误
                                ["basic", "这是一个高音调、高音量的快速语音示例。", self.prompt_audio_path_example, "speaker_1", "高", "高", "快速", None, None, None, None, None, 1234],
                                ["basic", "这是一个低音调、低音量的慢速语音示例。", self.prompt_audio_path_example, "speaker_1", "低", "低", "慢速", None, None, None, None, None, 5678],
                                ["dialect", "其实好多广州小学幼稚园都系噉样", self.prompt_audio_path_example, "speaker_1", "中", "中", "中速", "广粤话", None, None, None, None, 1234],
                                ["dialect", "那你们有啥子喜欢看的电视剧吗？", self.prompt_audio_path_example, "speaker_1", "中", "中", "中速", "川渝话", None, None, None, None, 5678],
                                ["emotion", "我今天非常开心，阳光明媚！", self.prompt_audio_path_example, "speaker_1", "中", "中", "中速", None, "高兴", None, None, None, 1234],
                                ["emotion", "这个消息太令人难过了。", self.prompt_audio_path_example, "speaker_1", "中", "中", "中速", None, "悲伤", None, None, None, 5678],
                                ["IP", "四个兄弟互相一商量，说道，我们的机会来了，让我们各展所能吧。", None, "speaker_1", "中", "中", "中速", None, None, "四郎", "甄嬛传", None, 1234],
                                ["IP", "也只有到村也只有到过村口的小动物，才知道村口有一家大熊拉面馆。", None, "speaker_1", "中", "中", "中速", None, None, "野原新之助 (小新)", "蜡笔小新", None, 5678],
                                ["style", "号召更多渴望突破自我的年轻力量，加入到敢于突破破界的队伍中来", None, "speaker_1", "中", "中", "中速", None, None, None, "一位女性以柔和、缓慢且富有情感的方式讲述一个深刻而悲伤的故事，营造出沉思和略带忧郁的氛围。", None, 1234],
                                ["style", "你现在马上把全套再复印一份，给我的司机武大", None, "speaker_1", "中", "中", "中速", None, None, None, "一位年幼男孩用缓慢清晰但略显模糊的语调，富有表现力地讲述故事，语调带有唱歌般的韵律。", None, 5678],
                                ["basic", "", self.prompt_audio_path_example, "speaker_1", "中", "中", "中速", '', '', '', '', '', 0], # 这一行用于填充可能为空的列
                            ]

                            structured_param_inputs = [
                                speaker_id, pitch_radio, volume_radio, speed_radio,
                                dialect_input, emotion_input, ip_character_input,
                                style_input, album_input
                            ]

                            # 保存引用以供事件绑定使用
                            self.examples_component = gr.Examples(
                                examples=examples_data,
                                inputs=[instruct_type, text_input, prompt_audio] + structured_param_inputs + [seed],
                                label="点击示例以填充输入",
                                cache_examples="lazy"
                            )

                            generate_btn = gr.Button("生成音频", variant="primary")

                        with gr.Column(scale=1):
                            audio_output = gr.Audio(label="合成结果", interactive=False)
                            # 状态和轮询组件
                            task_id_state = gr.State(None)
                            polling_counter = gr.Number(value=0, visible=False)
                            status_msg = gr.Markdown("")

                # --- Tab 2: 自由输入模式 ---
                with gr.TabItem("自由输入模式"):
                    with gr.Row():
                        with gr.Column(scale=1):
                            gr.Markdown("在此模式下, 您可以组合所有参数进行合成。")
                            expert_text = gr.Textbox(label="输入文本")
                            expert_prompt_audio = gr.Audio(type="filepath", label="参考音频 (IP/风格模式下可选)")
                            expert_speaker_id = gr.Textbox(label="说话人ID", value="speaker_1")
                            expert_pitch = gr.Radio(["低", "中", "高"], label="基频", value="中")
                            expert_volume = gr.Radio(["低", "中", "高"], label="音量", value="中")
                            expert_speed = gr.Radio(["慢速", "中速", "快速"], label="语速", value="中速")
                            expert_dialect = gr.Textbox(label="方言")
                            expert_emotion = gr.Textbox(label="情感")
                            expert_ip_character = gr.Textbox(label="IP角色")
                            expert_album = gr.Textbox(label="所属剧名", placeholder="可选")
                            expert_style = gr.Textbox(label="风格")
                            expert_seed = gr.Number(value=1234, label="随机种子", precision=0)
                            expert_generate_btn = gr.Button("生成音频", variant="primary")
                        with gr.Column(scale=1):
                            expert_audio_output = gr.Audio(label="合成结果", interactive=False)
                            expert_task_id_state = gr.State(None)
                            expert_polling_counter = gr.Number(value=0, visible=False)
                            expert_status_msg = gr.Markdown("")

                # --- Tab 3: JSON 输入模式 ---
                with gr.TabItem("JSON 输入模式"):
                    with gr.Row():
                        with gr.Column(scale=1):
                            json_input = gr.Textbox(lines=15, label="JSON 输入", placeholder='请在此输入JSON...')
                            free_prompt_audio = gr.Audio(type="filepath", label="参考音频")
                            free_seed = gr.Number(value=1234, label="随机种子", precision=0)
                            free_generate_btn = gr.Button("生成音频", variant="primary")
                        with gr.Column(scale=1):
                            free_audio_output = gr.Audio(label="合成结果", interactive=False)
                            free_task_id_state = gr.State(None)
                            free_polling_counter = gr.Number(value=0, visible=False)
                            free_status_msg = gr.Markdown("")

            # --- 事件绑定 ---

            logger.info("Binding events for AudioInstructTab")

            # 1. UI 动态可见性逻辑
            instruct_type.change(
                fn=self.update_ui_visibility,
                inputs=instruct_type,
                outputs=[basic_controls, dialect_controls, emotion_controls, ip_controls, style_controls, prompt_audio]
            )

            # # 1.5. 示例点击事件绑定
            # self.examples_component.load_input_event.then(
            #     fn=self.update_ui_visibility,
            #     inputs=[instruct_type, text_input, prompt_audio] + structured_param_inputs + [seed],
            #     outputs=[basic_controls, dialect_controls, emotion_controls, ip_controls, style_controls, prompt_audio]
            # )

            logger.info("Examples component event bound.")

            # 2. 结构化模式生成
            generate_btn.click(
                fn=self.submit_structured_task,
                inputs=[instruct_type, text_input, prompt_audio] + structured_param_inputs + [seed],
                outputs=[task_id_state, polling_counter, status_msg, audio_output]
            )
            polling_counter.change(
                fn=self.check_task_status,
                inputs=[task_id_state, polling_counter],
                outputs=[audio_output, polling_counter, status_msg],
                every=2
            )

            logger.info("Structured mode events bound.")

            # 3. 自由/专家模式生成
            expert_param_inputs = [
                expert_speaker_id, expert_pitch, expert_volume, expert_speed,
                expert_dialect, expert_emotion, expert_ip_character,
                expert_style, expert_album
            ]
            expert_generate_btn.click(
                fn=self.submit_expert_task,
                inputs=[expert_text, expert_prompt_audio] + expert_param_inputs + [expert_seed],
                outputs=[expert_task_id_state, expert_polling_counter, expert_status_msg, expert_audio_output]
            )
            expert_polling_counter.change(
                fn=self.check_task_status,
                inputs=[expert_task_id_state, expert_polling_counter],
                outputs=[expert_audio_output, expert_polling_counter, expert_status_msg],
                every=2
            )

            logger.info("Expert mode events bound.")

            # 4. JSON 模式生成
            free_generate_btn.click(
                fn=self.submit_json_task,
                inputs=[json_input, free_prompt_audio, free_seed],
                outputs=[free_task_id_state, free_polling_counter, free_status_msg, free_audio_output]
            )
            free_polling_counter.change(
                fn=self.check_task_status,
                inputs=[free_task_id_state, free_polling_counter],
                outputs=[free_audio_output, free_polling_counter, free_status_msg],
                every=2
            )

    def update_ui_visibility(self, instruct_type):
        """根据指令类型更新 UI 控件的可见性"""
        # Gradio Radio with tuples passes the 'value' to the function
        if instruct_type in ["IP", "style"]:
            new_audio_label = "参考音频 (在此模式下可选/不起作用)"
        else:
            new_audio_label = "参考音频 (Prompt Audio)"

        # 打个日志
        logger.info(f"Updating UI visibility for instruct_type: {instruct_type}")

        # 必须按 outputs=[basic_controls, dialect_controls, emotion_controls, ip_controls, style_controls, prompt_audio] 的顺序返回
        return (
            gr.update(visible=instruct_type == "basic"),
            gr.update(visible=instruct_type == "dialect"),
            gr.update(visible=instruct_type == "emotion"),
            gr.update(visible=instruct_type == "IP"),
            gr.update(visible=instruct_type == "style"),
            gr.update(label=new_audio_label)
        )

    def _construct_caption(self, instruct_type, speaker_id, pitch, volume, speed,
                         dialect, emotion, ip_character, style, album):
        """构建 caption 字典"""

        logger.info(f"Constructing caption for instruct_type: {instruct_type}")

        base_caption = {'序号': 1, '说话人': speaker_id or 'speaker_1'}

        # 统一处理逻辑，参考 004-in.py
        if instruct_type == "expert":
             return {
                "audio_sequence": [{
                    '序号': 1,
                    '说话人': speaker_id or 'speaker_1',
                    '方言': dialect if dialect else None,
                    '风格': style if style else None,
                    '语速': speed if speed and speed != "中" else None,
                    '基频': pitch if pitch and pitch != "中" else None,
                    '音量': volume if volume and volume != "中" else None,
                    '情感': emotion if emotion else None,
                    '影视IP': f"{album}_{ip_character}" if album and ip_character else ip_character
                }]
            }

        # 结构化模式
        if instruct_type == "basic":
            base_caption.update({"基频": pitch, "音量": volume, "语速": speed})
        elif instruct_type == "dialect":
            base_caption["方言"] = dialect
        elif instruct_type == "emotion":
            base_caption["情感"] = emotion
        elif instruct_type == "IP":
            base_caption["影视IP"] = f"{album}_{ip_character}" if album and ip_character else ip_character
        elif instruct_type == "style":
            base_caption["风格"] = style

        return {"audio_sequence": [base_caption]}

    def _submit_task(self, payload):
        """
        内部任务提交方法。
        调用 SpeechService 的 submit_instruct_task 方法。
        """
        logger.info(f"AudioInstructTab submitting task with payload: {payload}")

        # 调用 SpeechService 的新接口
        # payload 已经包含了 text, prompt_audio, caption, seed
        return self.service.submit_instruct_task(payload)

    def _check_task(self, task_id):
        """
        内部任务状态检查方法。
        调用 SpeechService 的 poll_instruct_task 方法。
        """

        logger.info(f"AudioInstructTab checking task status for task_id: {task_id}")

        return self.service.poll_instruct_task(task_id)

    def submit_structured_task(self, instruct_type, text, prompt_audio,
                             speaker_id, pitch, volume, speed, dialect, emotion,
                             ip_character, style, album, seed):
        """提交结构化任务"""
        logger.info(f"Submitting structured task: type={instruct_type}, text={text}")
        if not text:
            return None, 0, "错误: 请输入文本", None

        # 校验: 非 IP/Style 模式必须提供参考音频
        if not prompt_audio and instruct_type not in ["IP", "style"]:
            return None, 0, "错误: 此模式需要上传参考音频以提取音色", None

        caption = self._construct_caption(instruct_type, speaker_id, pitch, volume, speed,
                                        dialect, emotion, ip_character, style, album)

        payload = {
            "text": text,
            "prompt_audio": prompt_audio,
            "caption": json.dumps(caption, ensure_ascii=False),
            "seed": seed,
        }

        # 调用内部提交方法
        task_id = self._submit_task(payload)

        if task_id.startswith("错误"):
            return None, 0, task_id, None

        return task_id, 1, f"任务已提交 (ID: ...{task_id[-6:]})", None

    def submit_expert_task(self, text, prompt_audio,
                         speaker_id, pitch, volume, speed, dialect, emotion,
                         ip_character, style, album, seed):
        """提交专家模式任务"""

        logger.info(f"Submitting expert task with text: {text}")

        return self.submit_structured_task("expert", text, prompt_audio,
                                         speaker_id, pitch, volume, speed, dialect, emotion,
                                         ip_character, style, album, seed)

    def submit_json_task(self, json_str, prompt_audio, seed):
        """提交 JSON 模式任务"""

        logger.info(f"Submitting JSON task with input: {json_str}")

        if not json_str or not json_str.strip():
            return None, 0, "错误: 请输入 JSON", None

        try:
            data = json.loads(json_str)

            # 参考 inbox/004-in.py 的逻辑处理 caption
            if "caption" in data and isinstance(data["caption"], str):
                caption_str = data["caption"]
                corrected_caption_str = caption_str.replace('"null"', 'null')
                data["caption"] = json.loads(corrected_caption_str)

            text = data.get("text")
            caption_dict = data.get("caption")

            if text is None or caption_dict is None:
                 return None, 0, "错误: JSON 中必须包含 'text' 和 'caption' 字段", None

            # 检查是否包含 IP 或 风格，如果包含则无需 prompt_audio
            is_ip_or_style = False
            if isinstance(caption_dict, dict) and "audio_sequence" in caption_dict and caption_dict["audio_sequence"]:
                first_item = caption_dict["audio_sequence"][0]
                # 注意：key 可能是中文 "影视IP"/"风格"
                if first_item.get("影视IP") or first_item.get("风格"):
                    is_ip_or_style = True

            if not prompt_audio and not is_ip_or_style:
                return None, 0, "错误: 此模式需要上传参考音频 (除非指定了影视IP or 风格)", None

        except (json.JSONDecodeError, TypeError) as e:
            return None, 0, f"错误: JSON 格式无效或处理失败: {e}", None

        payload = {
            "text": text,
            "prompt_audio": prompt_audio,
            "caption": json.dumps(caption_dict, ensure_ascii=False),
            "seed": seed,
        }

        task_id = self._submit_task(payload)

        if task_id.startswith("错误"):
            return None, 0, task_id, None

        return task_id, 1, f"任务已提交 (ID: ...{task_id[-6:]})", None

    def check_task_status(self, task_id, polling_counter):
        """检查任务状态"""

        # 如果没有任务ID或者轮询计数器归零（任务已结束），则停止轮询逻辑
        if not task_id or polling_counter == 0:
             return gr.update(), 0, gr.update()

        logger.info(f"Checking task status for task_id: {task_id}, polling_counter: {polling_counter}")

        # 调用内部检查方法
        status, result = self._check_task(task_id)

        if status == "pending":
            elapsed = polling_counter * 2
            return gr.update(), polling_counter + 1, f"合成中... ({elapsed}s)"
        elif status == "done" or status == "completed":
            return gr.update(value=result), 0, "合成成功！"
        else:
            return gr.update(), 0, f"失败: {status}"