Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

README.md +28 -0
checkpoint.ckpt +3 -0
config.json +117 -0
matformer_config.json +68 -0
modeling_matformer.py +47 -0

README.md ADDED Viewed

	@@ -0,0 +1,28 @@

+---
+tags:
+- matformer
+- custom-model
+library_name: transformers
+---
+# Matformer Model
+Trained using [Matformer](https://github.com/mrinaldi97/matformer).
+## Installation
+```bash
+pip install git+https://github.com/mrinaldi97/matformer.git
+```
+## Usage
+```python
+import torch
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained(
+    "mrinaldi/albertina_mini_alibi_7B_tokens",
+    trust_remote_code=True
+)
+```

checkpoint.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37bc1c20326725b7bd05513aae647f0076ba97a5f5f50e30514d2fb0ae2fbac3
+size 385548491

config.json ADDED Viewed

	@@ -0,0 +1,117 @@

+{
+  "_checkpoint_path": "/home/matteo/Albertone/Matformer/matformer/checkpoints_albertina_gold/Albertina_mini_alibi_v_1_s_0.ckpt",
+  "_matformer_config_dict": {
+    "_checkpoint_path": "/home/matteo/Albertone/Matformer/matformer/checkpoints_albertina_gold/Albertina_mini_alibi_v_1_s_0.ckpt",
+    "_model_class": "BERTModel",
+    "_tokenizer_name": "mrinaldi/Gettone",
+    "attention_type": [],
+    "bias": false,
+    "block_size_for_attention": 128,
+    "bos_token_id": 5,
+    "cloze_probability": 1.0,
+    "compile_flexattn": false,
+    "custom_layers": {},
+    "decoder": null,
+    "default_layer": {
+      "attn_impl": "flash",
+      "ffn_activation": "swiglu",
+      "hooks": {},
+      "normalization": "rmsnorm",
+      "normalization_position": "pre",
+      "positional_encoding": "alibi",
+      "sliding_window_size": null
+    },
+    "encoder": null,
+    "entropy": null,
+    "eos_token_id": 6,
+    "ffn_factor": 3.0,
+    "has_entropy_model": null,
+    "has_text_autoencoder": null,
+    "hidden_size": 768,
+    "hybrid_curriculum": null,
+    "hybrid_equal_final_step": null,
+    "hybrid_mlm_end": null,
+    "hybrid_mlm_prob": null,
+    "hybrid_mlm_start": null,
+    "is_causal": null,
+    "loss_type": "cross_entropy_loss",
+    "mask_token_id": 4,
+    "masked_substitution_rate": 0.2,
+    "max_position_embeddings": 1024,
+    "model_class": null,
+    "name": "Albertina_mini_alibi",
+    "num_attention_heads": 12,
+    "num_hidden_layers": 6,
+    "num_labels": 2,
+    "pad_token_id": 0,
+    "random_probability": 0.0,
+    "rms_norm_eps": 1e-06,
+    "rope_theta": 10000.0,
+    "same_probability": 0.0,
+    "sliding_type": null,
+    "tie_word_embeddings": false,
+    "tokenizer_name": null,
+    "tokenizer_type": null,
+    "training_objective": null,
+    "vocab_size": 32768
+  },
+  "_model_class": "BERTModel",
+  "_tokenizer_name": "mrinaldi/Gettone",
+  "attention_type": [],
+  "auto_map": {
+    "AutoConfig": "modeling_matformer.MatformerConfig",
+    "AutoModel": "modeling_matformer.MatformerModel",
+    "AutoModelForMaskedLM": "modeling_matformer.MatformerForMaskedLM"
+  },
+  "bias": false,
+  "block_size_for_attention": 128,
+  "bos_token_id": 5,
+  "cloze_probability": 1.0,
+  "compile_flexattn": false,
+  "custom_layers": {},
+  "decoder": null,
+  "default_layer": {
+    "attn_impl": "flash",
+    "ffn_activation": "swiglu",
+    "hooks": {},
+    "normalization": "rmsnorm",
+    "normalization_position": "pre",
+    "positional_encoding": "alibi",
+    "sliding_window_size": null
+  },
+  "encoder": null,
+  "entropy": null,
+  "eos_token_id": 6,
+  "ffn_factor": 3.0,
+  "has_entropy_model": null,
+  "has_text_autoencoder": null,
+  "hidden_size": 768,
+  "hybrid_curriculum": null,
+  "hybrid_equal_final_step": null,
+  "hybrid_mlm_end": null,
+  "hybrid_mlm_prob": null,
+  "hybrid_mlm_start": null,
+  "is_causal": null,
+  "loss_type": "cross_entropy_loss",
+  "mask_token_id": 4,
+  "masked_substitution_rate": 0.2,
+  "max_position_embeddings": 1024,
+  "model_class": null,
+  "model_type": "matformer",
+  "name": "Albertina_mini_alibi",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "random_probability": 0.0,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 10000.0,
+  "same_probability": 0.0,
+  "seed": 42,
+  "sliding_type": null,
+  "tokenizer_name": null,
+  "tokenizer_type": null,
+  "training_objective": null,
+  "transformers_version": "4.53.0",
+  "use_cache": true,
+  "vocab_size": 32768
+}

matformer_config.json ADDED Viewed

	@@ -0,0 +1,68 @@

+{
+  "model_class": "BERTModel",
+  "model_config": {
+    "name": "Albertina_mini_alibi",
+    "hidden_size": 768,
+    "ffn_factor": 3.0,
+    "vocab_size": 32768,
+    "bos_token_id": 5,
+    "eos_token_id": 6,
+    "pad_token_id": 0,
+    "mask_token_id": 4,
+    "masked_substitution_rate": 0.2,
+    "cloze_probability": 1.0,
+    "random_probability": 0.0,
+    "same_probability": 0.0,
+    "num_hidden_layers": 6,
+    "num_attention_heads": 12,
+    "tie_word_embeddings": false,
+    "rms_norm_eps": 1e-06,
+    "attention_type": [],
+    "max_position_embeddings": 1024,
+    "block_size_for_attention": 128,
+    "compile_flexattn": false,
+    "bias": false,
+    "default_layer": {
+      "attn_impl": "flash",
+      "sliding_window_size": null,
+      "positional_encoding": "alibi",
+      "normalization": "rmsnorm",
+      "normalization_position": "pre",
+      "ffn_activation": "swiglu",
+      "hooks": {}
+    },
+    "custom_layers": {}
+  },
+  "training": {
+    "optimizer": "muon",
+    "lr_scheduling": true,
+    "lr": 0.0005,
+    "final_lr": 2e-05,
+    "hold_steps": 0.0,
+    "weight_decay": 0.01,
+    "scheduler": "custom",
+    "gradient_clip_val": 1.0,
+    "warmup_steps": 0.05,
+    "max_epochs": 1,
+    "accumulate_grad_batches": 16,
+    "seed": 27,
+    "save_every_n_steps": 100,
+    "checkpoint_name": "Albertina_mini_alibi"
+  },
+  "tokenizer": {
+    "type": "huggingface",
+    "pretrained_name": "mrinaldi/Gettone",
+    "varlen_strategy": "unpadding"
+  },
+  "data": {
+    "data_root": "/mnt/llmdata/data/Albertone_MDAT",
+    "batch_size": 48,
+    "num_workers": 1,
+    "mdat_strategy": "Gettone1024_",
+    "mdat_view": "Albertina7B",
+    "wanted_from_strategy": "chunked_for_recurrence"
+  },
+  "save_dir": "./checkpoints_albertina_gold",
+  "wandb_project": "Albertina_gold",
+  "wandb_run_name": "Albertina_mini_alibi"
+}

modeling_matformer.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# modeling_matformer.py
+import os
+import sys
+matformer_root = os.getenv("MATFORMER_ROOT")
+if matformer_root:
+    matformer_root = os.path.abspath(os.path.expanduser(matformer_root))
+    if matformer_root not in sys.path:
+        sys.path.insert(0, matformer_root)
+try:
+    from matformer.huggingface_integration import (
+        MatformerForCausalLM,
+        MatformerForMaskedLM,
+        MatformerForSequenceClassification,
+        MatformerModel,
+        MatformerConfig,
+        register_matformer
+    )
+    register_matformer()
+except ImportError as e:
+    import subprocess
+    import tempfile
+    print("Installing Matformer from GitHub...")
+    try:
+        subprocess.check_call([
+            sys.executable, "-m", "pip", "install",
+            "git+https://github.com/mrinaldi97/matformer.git"
+        ])
+        from matformer.huggingface_integration import (
+            MatformerForCausalLM,
+            MatformerForMaskedLM,
+            MatformerForSequenceClassification,
+            MatformerModel,
+            MatformerConfig,
+            register_matformer
+        )
+        register_matformer()
+    except Exception as install_error:
+        raise ImportError(
+            "Failed to install Matformer. Install manually:\n"
+            "  pip install git+https://github.com/mrinaldi97/matformer.git\n"
+            "Or set MATFORMER_ROOT environment variable"
+        ) from install_error