#!/usr/bin/env python3
"""
Duplicate layer 5 (GQA Attention) in nemotron-cascade-2 GGUF.
Produces a 53-layer model where layers 5 and 6 are identical attention blocks.
All layers >= 6 in the original are shifted up by 1.

Per-layer metadata arrays (head_count_kv, feed_forward_length) are also updated.
"""

import re
import numpy as np
from pathlib import Path
from gguf import GGUFReader, GGUFWriter, GGUFValueType

SRC = Path("/usr/share/ollama/.ollama/models/blobs/sha256-9e0c827cfd6a6d000032be3da3d0914668b0c1112977e927186d29c4487466c4")
DST = Path("/home/j/nemotron-cascade-2-attn-repeat-L5.gguf")
DUP_LAYER = 5
ARCH = "nemotron_h_moe"

# Per-layer array fields that need an extra element inserted
PER_LAYER_ARRAY_FIELDS = {
    f"{ARCH}.attention.head_count_kv",
    f"{ARCH}.feed_forward_length",
}


def read_per_layer_array(field):
    """Read a per-layer uint32 array from GGUF field parts.

    In gguf-py, array fields have parts:
      [0]: key name length
      [1]: key name bytes
      [2]: array type marker (9 = ARRAY)
      [3]: element type (4 = UINT32)
      [4]: array length
      [5..]: individual element values
    """
    parts = field.parts
    arr_len = int(parts[4][0]) if hasattr(parts[4], '__getitem__') else int(parts[4])
    values = []
    for i in range(arr_len):
        p = parts[5 + i]
        val = int(p[0]) if hasattr(p, '__getitem__') else int(p)
        values.append(val)
    return values


def main():
    print(f"Reading {SRC} ...")
    reader = GGUFReader(str(SRC))

    # --- Build tensor list with duplication ---
    tensors_to_write = []
    for t in reader.tensors:
        m = re.match(r'^blk\.(\d+)\.(.*)', t.name)
        if m:
            blk = int(m.group(1))
            suffix = m.group(2)
            if blk <= DUP_LAYER:
                tensors_to_write.append((t.name, t))
                if blk == DUP_LAYER:
                    tensors_to_write.append((f"blk.{blk + 1}.{suffix}", t))
            else:
                tensors_to_write.append((f"blk.{blk + 1}.{suffix}", t))
        else:
            tensors_to_write.append((t.name, t))

    print(f"Original: {len(reader.tensors)} tensors, 52 layers")
    print(f"New: {len(tensors_to_write)} tensors, 53 layers")

    # --- Read per-layer arrays and insert duplicate ---
    per_layer_arrays = {}
    for field_name in PER_LAYER_ARRAY_FIELDS:
        field = reader.fields.get(field_name)
        if field and len(field.types) > 1 and field.types[0] == GGUFValueType.ARRAY:
            orig = read_per_layer_array(field)
            # Insert duplicate of DUP_LAYER at DUP_LAYER+1
            new_arr = orig[:DUP_LAYER + 1] + [orig[DUP_LAYER]] + orig[DUP_LAYER + 1:]
            per_layer_arrays[field_name] = new_arr
            print(f"  {field_name}: {len(orig)} -> {len(new_arr)} elements")
            # Show attention/moe layer positions for verification
            nonzero = [(i, v) for i, v in enumerate(new_arr) if v != 0]
            print(f"    non-zero positions: {nonzero}")

    # --- Write new GGUF ---
    print(f"Writing {DST} ...")
    writer = GGUFWriter(str(DST), ARCH)

    for field_name in reader.fields:
        if field_name.startswith("GGUF."):
            continue
        field = reader.fields[field_name]
        parts = field.parts

        # Skip architecture (writer adds automatically)
        if field_name == "general.architecture":
            continue

        # Update block_count
        if field_name == f"{ARCH}.block_count":
            print(f"  block_count: 52 -> 53")
            writer.add_uint32(f"{ARCH}.block_count", 53)
            continue

        # Handle per-layer arrays with inserted element
        if field_name in per_layer_arrays:
            writer.add_array(field_name, per_layer_arrays[field_name])
            continue

        # Handle other array types
        if len(field.types) > 1 and field.types[0] == GGUFValueType.ARRAY:
            arr_type = field.types[1]

            if arr_type == GGUFValueType.STRING:
                # String arrays: parts[5+] are alternating (length, bytes) pairs
                # Use a simpler approach: collect from data_offset
                strings = []
                i = 5  # skip header parts: name_len, name, array_type, elem_type, count
                while i < len(parts):
                    # Each string: length part then bytes part
                    if i + 1 < len(parts):
                        try:
                            s = parts[i + 1].tobytes().decode('utf-8')
                            strings.append(s)
                            i += 2
                        except:
                            i += 1
                    else:
                        break
                if strings:
                    writer.add_array(field_name, strings)
                continue

            elif arr_type == GGUFValueType.UINT32:
                arr_len = int(parts[4][0]) if hasattr(parts[4], '__getitem__') else int(parts[4])
                values = []
                for idx in range(arr_len):
                    p = parts[5 + idx]
                    values.append(int(p[0]) if hasattr(p, '__getitem__') else int(p))
                writer.add_array(field_name, values)
                continue

            elif arr_type == GGUFValueType.INT32:
                arr_len = int(parts[4][0]) if hasattr(parts[4], '__getitem__') else int(parts[4])
                values = []
                for idx in range(arr_len):
                    p = parts[5 + idx]
                    values.append(int(p[0]) if hasattr(p, '__getitem__') else int(p))
                writer.add_array(field_name, values)
                continue

            elif arr_type == GGUFValueType.FLOAT32:
                arr_len = int(parts[4][0]) if hasattr(parts[4], '__getitem__') else int(parts[4])
                values = []
                for idx in range(arr_len):
                    p = parts[5 + idx]
                    values.append(float(p[0]) if hasattr(p, '__getitem__') else float(p))
                writer.add_array(field_name, values)
                continue

            else:
                print(f"  SKIP array: {field_name} (elem type {arr_type})")
                continue

        # Scalar types
        field_type = field.types[-1] if field.types else None
        try:
            if field_type == GGUFValueType.STRING:
                val = parts[-1].tobytes().decode('utf-8')
                writer.add_string(field_name, val)
            elif field_type == GGUFValueType.UINT32:
                val = int(parts[-1][0]) if hasattr(parts[-1], '__getitem__') else int(parts[-1])
                writer.add_uint32(field_name, val)
            elif field_type == GGUFValueType.INT32:
                val = int(parts[-1][0]) if hasattr(parts[-1], '__getitem__') else int(parts[-1])
                writer.add_int32(field_name, val)
            elif field_type == GGUFValueType.FLOAT32:
                val = float(parts[-1][0]) if hasattr(parts[-1], '__getitem__') else float(parts[-1])
                writer.add_float32(field_name, val)
            elif field_type == GGUFValueType.BOOL:
                val = bool(parts[-1][0]) if hasattr(parts[-1], '__getitem__') else bool(parts[-1])
                writer.add_bool(field_name, val)
            elif field_type == GGUFValueType.UINT64:
                val = int(parts[-1][0]) if hasattr(parts[-1], '__getitem__') else int(parts[-1])
                writer.add_uint64(field_name, val)
            elif field_type == GGUFValueType.UINT16:
                val = int(parts[-1][0]) if hasattr(parts[-1], '__getitem__') else int(parts[-1])
                writer.add_uint16(field_name, val)
            elif field_type == GGUFValueType.UINT8:
                val = int(parts[-1][0]) if hasattr(parts[-1], '__getitem__') else int(parts[-1])
                writer.add_uint8(field_name, val)
            else:
                print(f"  SKIP: {field_name} (type {field_type})")
        except Exception as e:
            print(f"  ERROR on {field_name}: {e}")

    # --- Add tensors ---
    for new_name, tensor in tensors_to_write:
        writer.add_tensor(new_name, tensor.data, raw_dtype=tensor.tensor_type)

    print("Finalizing ...")
    writer.write_header_to_file()
    writer.write_kv_data_to_file()
    writer.write_tensors_to_file()
    writer.close()

    size_gb = DST.stat().st_size / (1024**3)
    print(f"Done! {DST} ({size_gb:.1f} GB)")


if __name__ == "__main__":
    main()