"""Gemma3 Pi0.6 (270m VLM) configuration"""

from transformers import Gemma3Config


class Gemma3Pi06Config(Gemma3Config):
    """
    Configuration for Gemma3 Pi0.6 - a VLM with 270m language model.

    This config combines:
    - Vision tower from google/gemma-3-4b-pt (SigLIP)
    - Multi-modal projector (reinitialize for dimension compatibility)
    - Language model from google/gemma-3-270m
    """

    model_type = "gemma3"  # Keep gemma3 for LLaMAFactory compatibility

    def __init__(
        self,
        vlm_base_model="google/gemma-3-4b-pt",
        llm_base_model="google/gemma-3-270m",
        **kwargs
    ):
        # Store base model IDs for reference
        self.vlm_base_model = vlm_base_model
        self.llm_base_model = llm_base_model

        # Load base configs
        from transformers import AutoConfig

        vlm_config = AutoConfig.from_pretrained(vlm_base_model, trust_remote_code=True)
        llm_config = AutoConfig.from_pretrained(llm_base_model, trust_remote_code=True)

        # Initialize with VLM config (keeps vision_config)
        super().__init__(**vlm_config.to_dict())

        # Override text_config with LLM config (except vocab_size)
        for key, value in llm_config.to_dict().items():
            if key not in ['_name_or_path', 'transformers_version', 'model_type', 'architectures', 'vocab_size']:
                setattr(self.text_config, key, value)

        # Keep VLM vocab size for image tokens
        # VLM: 262208 (262144 base + 64 image tokens)
        # LLM: 262144 (base only)
        # We need the extended vocab for multimodal functionality
        self.text_config.vocab_size = vlm_config.text_config.vocab_size

        # Apply any user overrides
        for key, value in kwargs.items():
            setattr(self, key, value)