""" fine_tune.py — Fine-tune Liquid AI's LFM2.5-1.2B model for spam classification using LoRA. This script uses Liquid AI's officially recommended fine-tuning approach: - TRL's SFTTrainer for supervised fine-tuning - PEFT's LoRA for memory-efficient adapter training It does three things: 1. Trains LoRA adapters on our spam/ham training data 2. Evaluates the trained model on the test set 3. Runs a quick generation test to verify the adapter works Prerequisites: - Training data in training_data/train.jsonl (copied from MLX project) - Internet connection to download the model (first run only) Usage: python3 fine_tune.py Hardware: Requires Apple Silicon Mac (M1/M2/M3/M4). Targets ~8-12 GB unified memory. """ import json import os import sys import torch from datasets import Dataset from peft import LoraConfig from transformers import AutoModelForCausalLM, AutoTokenizer from trl import SFTConfig, SFTTrainer # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- # The base model from Liquid AI (downloaded automatically from HuggingFace) MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct" # Where to cache the downloaded model locally MODEL_CACHE_DIR = "models" # Path to the shared training data (shared with spam-classifier-mlx) DATA_DIR = "../shared_training_data" TRAIN_FILE = os.path.join(DATA_DIR, "train.jsonl") TEST_FILE = os.path.join(DATA_DIR, "test.jsonl") # Where the trained LoRA adapter weights will be saved ADAPTER_DIR = "adapters" # LoRA hyperparameters — from Liquid AI's official cookbook # (Source: https://github.com/Liquid4All/cookbook/blob/main/finetuning/notebooks/sft_with_trl.ipynb) LORA_RANK = 8 # How many parameters LoRA adds per layer LORA_ALPHA = 16 # Scaling factor (2x the rank) LORA_DROPOUT = 0.1 # Dropout to prevent overfitting LORA_TARGET_MODULES = [ # Which layers inside the model get LoRA adapters # Attention layers (Multi-Head Attention) "q_proj", # Query projection "k_proj", # Key projection "v_proj", # Value projection "out_proj", # Output projection (Liquid AI uses out_proj, not o_proj) # Feed-forward layers (GLU — Gated Linear Unit) "w1", # First feed-forward weight "w2", # Second feed-forward weight "w3", # Gate weight # Conv layers (unique to Liquid AI's architecture) "in_proj", # Input projection (conv block) ] # Training hyperparameters NUM_EPOCHS = 3 # How many times to loop through all training data BATCH_SIZE = 1 # Smaller batch = much less memory per step GRADIENT_ACCUMULATION_STEPS = 4 # Accumulate 4 steps to get the same effective batch size of 4 LEARNING_RATE = 2e-4 # How fast the model learns (Liquid AI's SFT recommendation) MAX_LENGTH = 256 # Spam emails rarely need 512 tokens — 256 covers most cases LOGGING_STEPS = 10 # Print training loss every 10 steps # --------------------------------------------------------------------------- # 1. Check prerequisites # --------------------------------------------------------------------------- def check_prerequisites(): """Make sure the training data exists before we start.""" print("=" * 60) print("STEP 1: Checking prerequisites") print("=" * 60) # Check training data if not os.path.isfile(TRAIN_FILE): print(f" ERROR: Training data not found: {TRAIN_FILE}") print(" Training data lives in the shared folder:") print(f" {os.path.abspath(DATA_DIR)}") sys.exit(1) print(f" Training data: {TRAIN_FILE} ... OK") # Check test data if not os.path.isfile(TEST_FILE): print(f" WARNING: Test data not found: {TEST_FILE}") print(" Evaluation step will be skipped.") else: print(f" Test data: {TEST_FILE} ... OK") # Check for Apple Silicon if torch.backends.mps.is_available(): print(" Apple Silicon: MPS backend available ... OK") else: print(" WARNING: MPS not available. Training will use CPU (much slower).") print() # --------------------------------------------------------------------------- # 2. Load training data # --------------------------------------------------------------------------- def load_jsonl_as_dataset(file_path): """Load a JSONL file into a HuggingFace Dataset. Each line in the JSONL file looks like: {"messages": [{"role": "system", ...}, {"role": "user", ...}, {"role": "assistant", ...}]} SFTTrainer expects a Dataset with a "messages" column in this exact format. """ examples = [] with open(file_path, "r") as f: for line in f: data = json.loads(line.strip()) examples.append(data) return Dataset.from_list(examples) # --------------------------------------------------------------------------- # 3. Run LoRA fine-tuning # --------------------------------------------------------------------------- def run_training(): """ Fine-tune the Liquid AI model using TRL's SFTTrainer with LoRA. This is Liquid AI's officially recommended fine-tuning approach. SFTTrainer handles: - Applying the chat template to format messages correctly - Tokenizing the text into numbers the model understands - Masking the prompt so loss is only computed on the assistant's response - Running the training loop (forward pass, loss, backward pass, optimizer) Source: https://docs.liquid.ai/customization/finetuning-frameworks/trl """ print("=" * 60) print("STEP 2: LoRA fine-tuning with TRL SFTTrainer") print("=" * 60) # --- Load the base model --- print(f" Loading model: {MODEL_ID}") print(" (First run downloads ~2.4 GB from HuggingFace — be patient)") print() # Detect if bfloat16 is supported on this Mac # Newer macOS + Apple Silicon supports bf16, but older versions may not use_bf16 = torch.backends.mps.is_available() model = AutoModelForCausalLM.from_pretrained( MODEL_ID, cache_dir=MODEL_CACHE_DIR, # Save to local models/ directory device_map="mps", # Pin everything to the MPS GPU (avoids CPU spill) torch_dtype=torch.bfloat16, # Load in bfloat16 from the start to cut memory in half ) tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, cache_dir=MODEL_CACHE_DIR) # Make sure the tokenizer has a padding token (needed for batched training) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print(" Model loaded successfully!") print() # --- Configure LoRA --- # LoRA adds small trainable adapter layers to the model's attention layers. # Only these adapters are trained — the original model weights stay frozen. print(" Configuring LoRA adapters...") print(f" Rank: {LORA_RANK}") print(f" Alpha: {LORA_ALPHA}") print(f" Target modules: {LORA_TARGET_MODULES}") print(f" Dropout: {LORA_DROPOUT}") peft_config = LoraConfig( r=LORA_RANK, lora_alpha=LORA_ALPHA, lora_dropout=LORA_DROPOUT, target_modules=LORA_TARGET_MODULES, task_type="CAUSAL_LM", ) # --- Load training data --- print() print(f" Loading training data from {TRAIN_FILE}...") train_dataset = load_jsonl_as_dataset(TRAIN_FILE) print(f" Loaded {len(train_dataset)} training examples") # --- Configure the trainer --- # SFTConfig holds all the training settings. # SFTTrainer is the engine that runs the actual training loop. print() print(" Training configuration:") print(f" Epochs: {NUM_EPOCHS}") print(f" Batch size: {BATCH_SIZE}") print(f" Grad accumulation: {GRADIENT_ACCUMULATION_STEPS} (effective batch = {BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS})") print(f" Learning rate: {LEARNING_RATE}") print(f" Max sequence length: {MAX_LENGTH}") print(f" Logging every: {LOGGING_STEPS} steps") print() training_args = SFTConfig( output_dir=ADAPTER_DIR, num_train_epochs=NUM_EPOCHS, per_device_train_batch_size=BATCH_SIZE, gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS, learning_rate=LEARNING_RATE, max_length=MAX_LENGTH, logging_steps=LOGGING_STEPS, save_strategy="epoch", # Save a checkpoint after each epoch bf16=use_bf16, # Use bfloat16 if MPS supports it gradient_checkpointing=True, # Save memory by recomputing gradients gradient_checkpointing_kwargs={"use_reentrant": False}, # More stable on MPS optim="adamw_torch", # Standard PyTorch AdamW (bitsandbytes not supported on MPS) max_grad_norm=0.3, # Clip gradients to prevent instability report_to="none", # Don't log to wandb or other services ) trainer = SFTTrainer( model=model, args=training_args, train_dataset=train_dataset, processing_class=tokenizer, peft_config=peft_config, ) # --- Run training --- print(" Training started — watch the loss decrease over time!") print(" (This will take ~1-1.5 hours on Apple Silicon for 3 epochs)") print() trainer.train() # --- Save the trained adapter --- trainer.save_model(ADAPTER_DIR) tokenizer.save_pretrained(ADAPTER_DIR) print() print(f" Training complete! Adapter saved to: {ADAPTER_DIR}/") print() return model, tokenizer # --------------------------------------------------------------------------- # 4. Evaluate on test set # --------------------------------------------------------------------------- def run_evaluation(model, tokenizer): """ Evaluate the trained model on the test set. Computes the average loss (and perplexity) on held-out test data. Lower perplexity = the model is better at predicting the test data. """ print("=" * 60) print("STEP 3: Evaluating on test set") print("=" * 60) if not os.path.isfile(TEST_FILE): print(" Skipping — no test data found.") print() return test_dataset = load_jsonl_as_dataset(TEST_FILE) print(f" Loaded {len(test_dataset)} test examples") # Use the same config but for evaluation only eval_args = SFTConfig( output_dir=ADAPTER_DIR, per_device_eval_batch_size=1, max_length=MAX_LENGTH, bf16=torch.backends.mps.is_available(), report_to="none", ) trainer = SFTTrainer( model=model, args=eval_args, train_dataset=test_dataset, # Required by SFTTrainer, not used for eval eval_dataset=test_dataset, processing_class=tokenizer, ) metrics = trainer.evaluate() print(f" Test loss: {metrics.get('eval_loss', 'N/A'):.4f}") eval_loss = metrics.get("eval_loss") if eval_loss is not None: import math perplexity = math.exp(eval_loss) print(f" Test perplexity: {perplexity:.2f}") print() # --------------------------------------------------------------------------- # 5. Quick generation test # --------------------------------------------------------------------------- def run_generation_test(model, tokenizer): """ Classify a sample spam email to verify the adapter actually works. This loads the trained adapter and generates a classification response for a clearly spammy test email. """ print("=" * 60) print("STEP 4: Quick generation test") print("=" * 60) print(" Classifying a sample spam email to verify the adapter works...") print() # A clearly spammy test email test_email = ( "Congratulations!!! You have been selected as the WINNER of our " "$1,000,000 prize draw! Click here NOW to claim your reward. " "This offer expires in 24 hours. Act fast! Send your bank details " "to claim your prize immediately." ) # Build the chat messages — same format used during training messages = [ { "role": "system", "content": ( "You are an email spam classifier. Analyze the email and classify it " "as SPAM or HAM. Explain your reasoning." ), }, { "role": "user", "content": ( "Classify this email as SPAM or HAM. Give your classification on the " "first line, then explain your reasoning in 2-3 sentences. Be specific " "about what words, patterns, or signals you noticed.\n\n" f"Email:\n{test_email}" ), }, ] # Apply the chat template — converts messages into the format the model expects prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) # Tokenize and move to the model's device inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Generate the response with torch.no_grad(): output_ids = model.generate( **inputs, max_new_tokens=200, do_sample=True, temperature=0.1, ) # Decode only the NEW tokens (skip the input prompt) new_tokens = output_ids[0][inputs["input_ids"].shape[1]:] response = tokenizer.decode(new_tokens, skip_special_tokens=True) print(f" Test email: {test_email[:80]}...") print() print(" Model response:") print(" " + "-" * 40) print(f" {response}") print(" " + "-" * 40) print() # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def load_saved_adapter(): """Load the base model with the previously trained LoRA adapter.""" from peft import PeftModel print("=" * 60) print("STEP 2: Loading saved adapter (skipping training)") print("=" * 60) if not os.path.isdir(ADAPTER_DIR): print(f" ERROR: No saved adapter found at {ADAPTER_DIR}/") print(" Run without --eval-only first to train the model.") sys.exit(1) print(f" Loading base model: {MODEL_ID}") model = AutoModelForCausalLM.from_pretrained( MODEL_ID, cache_dir=MODEL_CACHE_DIR, device_map="mps", torch_dtype=torch.bfloat16, ) tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, cache_dir=MODEL_CACHE_DIR) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print(f" Loading LoRA adapter from {ADAPTER_DIR}/") model = PeftModel.from_pretrained(model, ADAPTER_DIR) print(" Adapter loaded successfully!") print() return model, tokenizer def main(): eval_only = "--eval-only" in sys.argv print() print("*" * 60) print(" Spam Classifier — Liquid AI LoRA Fine-Tuning") print(" Model: LFM2.5-1.2B-Instruct") print(" Method: TRL SFTTrainer + PEFT LoRA") if eval_only: print(" Mode: Evaluation only (skipping training)") print("*" * 60) print() # Step 1: Check that everything is in place check_prerequisites() # Step 2: Train or load saved adapter if eval_only: model, tokenizer = load_saved_adapter() else: model, tokenizer = run_training() # Step 3: Evaluate on test set run_evaluation(model, tokenizer) # Step 4: Quick sanity check — classify a sample email run_generation_test(model, tokenizer) # Done! print("=" * 60) print(" All done!") if not eval_only: print(f" Adapter weights saved to: {ADAPTER_DIR}/") print() print(" Next step: python3 app.py") print("=" * 60) print() if __name__ == "__main__": main()