Upload 4 files

Browse files

Files changed (4) hide show

examples/basic_inference.py +60 -0
examples/batch_inference.py +206 -0
examples/benchmark.py +47 -0
examples/model_info.py +142 -0

examples/basic_inference.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""
+Basic inference example for JaneGPT v2 Intent Classifier.
+"""
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from model.classifier import JaneGPTClassifier
+def main():
+    # Load model
+    classifier = JaneGPTClassifier()
+    print(f"Model loaded: {classifier}")
+    print(f"Supported intents: {len(classifier.get_supported_intents())}\n")
+    # Test commands
+    test_inputs = [
+        "turn up the volume",
+        "make it louder",
+        "set volume to 50",
+        "mute",
+        "turn down the brightness",
+        "open chrome",
+        "play shape of you on youtube",
+        "search for python tutorials",
+        "set a reminder for 10 minutes",
+        "take a screenshot",
+        "read this for me",
+        "explain what's on my screen",
+        "undo that",
+        "shut down",
+        "hello",
+        "what time is it",
+    ]
+    print(f"{'Input':<45} {'Intent':<20} {'Confidence':<10}")
+    print("-" * 75)
+    for text in test_inputs:
+        intent, confidence = classifier.predict(text)
+        print(f"{text:<45} {intent:<20} {confidence:.1%}")
+    # Context-aware classification
+    print("\n--- Context-Aware ---")
+    # After volume up, user says "not enough"
+    intent, conf = classifier.predict(
+        "not enough",
+        context={"last_intent": "volume_up"}
+    )
+    print(f"{'not enough [after volume_up]':<45} {intent:<20} {conf:.1%}")
+    # Top-k predictions
+    print("\n--- Top-3 Predictions ---")
+    results = classifier.predict_top_k("play something nice", k=3)
+    for intent, conf in results:
+        print(f"  {intent}: {conf:.1%}")
+if __name__ == "__main__":
+    main()

examples/batch_inference.py ADDED Viewed

	@@ -0,0 +1,206 @@

+"""
+Batch inference example for JaneGPT v2 Intent Classifier.
+Classifies multiple inputs efficiently.
+"""
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import time
+import json
+from pathlib import Path
+from typing import List, Dict
+import torch
+from model.classifier import JaneGPTClassifier
+def classify_batch(
+    classifier: JaneGPTClassifier,
+    texts: List[str],
+    context: dict = None
+) -> List[Dict]:
+    """
+    Classify a batch of texts.
+    Note: Current implementation processes sequentially.
+    For true batch processing with padding, see classify_batch_parallel().
+    Args:
+        classifier: Loaded JaneGPTClassifier
+        texts: List of user utterances
+        context: Optional shared context
+    Returns:
+        List of result dictionaries
+    """
+    results = []
+    for text in texts:
+        intent, confidence = classifier.predict(text, context)
+        results.append({
+            "text": text,
+            "intent": intent,
+            "confidence": round(confidence, 4),
+        })
+    return results
+def classify_batch_parallel(
+    classifier: JaneGPTClassifier,
+    texts: List[str],
+    context: dict = None
+) -> List[Dict]:
+    """
+    Classify a batch of texts in parallel (single forward pass).
+    More efficient for large batches on GPU.
+    Args:
+        classifier: Loaded JaneGPTClassifier
+        texts: List of user utterances
+        context: Optional shared context
+    Returns:
+        List of result dictionaries
+    """
+    if not classifier.is_ready:
+        raise RuntimeError("Model not loaded")
+    # Format and tokenize all inputs
+    all_ids = []
+    for text in texts:
+        formatted = classifier._format_input(text, context)
+        ids = classifier.tokenizer.encode(formatted).ids
+        if len(ids) > classifier.MAX_LEN:
+            ids = ids[:classifier.MAX_LEN]
+        else:
+            ids = ids + [classifier.PAD_ID] * (classifier.MAX_LEN - len(ids))
+        all_ids.append(ids)
+    # Create batch tensor
+    batch_tensor = torch.tensor(all_ids, dtype=torch.long, device=classifier.device)
+    # Single forward pass
+    with torch.no_grad():
+        logits, _ = classifier.model(batch_tensor)
+        probs = torch.softmax(logits, dim=-1)
+        confidences, predicted = torch.max(probs, dim=-1)
+    # Build results
+    results = []
+    for i, text in enumerate(texts):
+        idx = predicted[i].item()
+        conf = confidences[i].item()
+        intent = classifier.id_to_intent.get(idx, 'chat')
+        results.append({
+            "text": text,
+            "intent": intent,
+            "confidence": round(conf, 4),
+        })
+    return results
+def main():
+    # Load model
+    classifier = JaneGPTClassifier()
+    print(f"Model loaded: {classifier}\n")
+    # Example batch
+    commands = [
+        "turn up the volume",
+        "make it louder",
+        "open chrome",
+        "play shape of you",
+        "search for python tutorials on google",
+        "set brightness to 50",
+        "take a screenshot",
+        "set a reminder for 10 minutes",
+        "mute",
+        "read this for me",
+        "explain what's on my screen",
+        "undo that",
+        "shut down",
+        "hello",
+        "what can you do",
+        "close notepad",
+        "skip to the next song",
+        "dim the screen",
+        "pause the music",
+        "what time is it",
+    ]
+    # --- Sequential processing ---
+    print("=" * 65)
+    print("  Sequential Batch Processing")
+    print("=" * 65)
+    start = time.perf_counter()
+    results = classify_batch(classifier, commands)
+    elapsed = time.perf_counter() - start
+    print(f"\n  {'Text':<42} {'Intent':<20} {'Conf':>6}")
+    print(f"  {'-'*68}")
+    for r in results:
+        print(f"  {r['text']:<42} {r['intent']:<20} {r['confidence']:>5.1%}")
+    print(f"\n  Processed {len(commands)} commands in {elapsed*1000:.1f}ms")
+    print(f"  Average: {elapsed/len(commands)*1000:.1f}ms per command")
+    # --- Parallel processing ---
+    print(f"\n{'=' * 65}")
+    print("  Parallel Batch Processing (single forward pass)")
+    print("=" * 65)
+    start = time.perf_counter()
+    results_parallel = classify_batch_parallel(classifier, commands)
+    elapsed_parallel = time.perf_counter() - start
+    print(f"\n  Processed {len(commands)} commands in {elapsed_parallel*1000:.1f}ms")
+    print(f"  Average: {elapsed_parallel/len(commands)*1000:.1f}ms per command")
+    print(f"  Speedup: {elapsed/elapsed_parallel:.1f}x faster than sequential")
+    # Verify both methods give same results
+    match = all(
+        r1['intent'] == r2['intent']
+        for r1, r2 in zip(results, results_parallel)
+    )
+    print(f"  Results match: {'YES' if match else 'NO'}")
+    # --- Save results to JSON ---
+    output_file = Path("examples/batch_results.json")
+    with open(output_file, 'w') as f:
+        json.dump(results, f, indent=2)
+    print(f"\n  Results saved to: {output_file}")
+    # --- Batch with context ---
+    print(f"\n{'=' * 65}")
+    print("  Context-Aware Batch")
+    print("=" * 65)
+    # Simulate: user just adjusted volume, now giving follow-up commands
+    context = {"last_intent": "volume_up"}
+    follow_ups = [
+        "not enough",
+        "too much",
+        "a bit more",
+        "the other one",
+        "perfect",
+    ]
+    print(f"\n  Context: last_intent = volume_up\n")
+    ctx_results = classify_batch(classifier, follow_ups, context)
+    for r in ctx_results:
+        print(f"  {r['text']:<42} {r['intent']:<20} {r['confidence']:>5.1%}")
+if __name__ == "__main__":
+    main()

examples/benchmark.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""
+Speed benchmark for JaneGPT v2.
+"""
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import time
+from model.classifier import JaneGPTClassifier
+def main():
+    classifier = JaneGPTClassifier()
+    test_inputs = [
+        "turn up the volume",
+        "open chrome",
+        "play some music",
+        "set brightness to 50",
+        "search for cats",
+        "take a screenshot",
+        "hello",
+        "undo that",
+    ]
+    # Warmup
+    for text in test_inputs:
+        classifier.predict(text)
+    # Benchmark
+    iterations = 100
+    start = time.perf_counter()
+    for _ in range(iterations):
+        for text in test_inputs:
+            classifier.predict(text)
+    elapsed = time.perf_counter() - start
+    total_predictions = iterations * len(test_inputs)
+    print(f"Device: {classifier.device}")
+    print(f"Total predictions: {total_predictions}")
+    print(f"Total time: {elapsed:.2f}s")
+    print(f"Average per prediction: {elapsed/total_predictions*1000:.2f}ms")
+    print(f"Predictions per second: {total_predictions/elapsed:.0f}")
+if __name__ == "__main__":
+    main()

examples/model_info.py ADDED Viewed

	@@ -0,0 +1,142 @@

+"""
+Display detailed information about JaneGPT v2 model.
+Shows architecture, parameters, training info, and size comparisons.
+"""
+import os
+import torch
+from model.architecture import JaneGPTv2Classifier, INTENT_LABELS
+def main():
+    # Load checkpoint
+    checkpoint_path = "weights/janegpt_v2_classifier.pt"
+    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=False)
+    config = checkpoint.get('config', {})
+    # Create model
+    model = JaneGPTv2Classifier(
+        vocab_size=config.get('vocab_size', 8192),
+        embed_dim=config.get('embed_dim', 256),
+        num_heads=config.get('num_heads', 8),
+        num_kv_heads=config.get('num_kv_heads', 4),
+        num_layers=config.get('num_layers', 8),
+        ff_hidden=config.get('ff_hidden', 672),
+        max_seq_len=config.get('max_seq_len', 256),
+        dropout=config.get('dropout', 0.1),
+        rope_theta=config.get('rope_theta', 10000.0),
+    )
+    model.load_state_dict(checkpoint['model_state_dict'])
+    # Calculate parameters
+    total_params = sum(p.numel() for p in model.parameters())
+    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    buffers = sum(b.numel() for b in model.buffers())
+    print("=" * 60)
+    print("  JANEGPT v2 - MODEL INFORMATION")
+    print("=" * 60)
+    # Architecture
+    print("\n  ARCHITECTURE")
+    print(f"    Type:               Decoder-only Transformer (Classifier)")
+    print(f"    Vocab Size:         {config.get('vocab_size', 8192):,}")
+    print(f"    Embedding Dim:      {config.get('embed_dim', 256)}")
+    print(f"    Attention Heads:    {config.get('num_heads', 8)}")
+    print(f"    KV Heads (GQA):     {config.get('num_kv_heads', 4)}")
+    print(f"    Head Dim:           {config.get('embed_dim', 256) // config.get('num_heads', 8)}")
+    print(f"    Layers:             {config.get('num_layers', 8)}")
+    print(f"    FF Hidden:          {config.get('ff_hidden', 672)}")
+    print(f"    Max Seq Length:     {config.get('max_seq_len', 256)}")
+    print(f"    Dropout:            {config.get('dropout', 0.1)}")
+    print(f"    RoPE Theta:         {config.get('rope_theta', 10000.0)}")
+    # Features
+    print("\n  FEATURES")
+    print(f"    Position Encoding:  RoPE (Rotary Position Embedding)")
+    print(f"    Normalization:      RMSNorm")
+    print(f"    Attention:          Grouped Query Attention (GQA)")
+    print(f"    Feed-Forward:       SwiGLU")
+    print(f"    Classifier Head:    Linear -> GELU -> Dropout -> Linear")
+    print(f"    Output Classes:     {len(INTENT_LABELS)}")
+    # Parameters
+    print("\n  PARAMETERS")
+    print(f"    Total Parameters:       {total_params:>12,}")
+    print(f"    Trainable Parameters:   {trainable_params:>12,}")
+    print(f"    Non-trainable Buffers:  {buffers:>12,}")
+    print(f"    Model Size (float32):   {total_params * 4 / 1024 / 1024:.2f} MB")
+    print(f"    Model Size (float16):   {total_params * 2 / 1024 / 1024:.2f} MB")
+    # Breakdown
+    print("\n  PARAMETER BREAKDOWN")
+    print(f"    {'Component':<35} {'Params':>12} {'%':>8}")
+    print(f"    {'-' * 55}")
+    emb_params = sum(p.numel() for p in model.token_embedding.parameters())
+    print(f"    {'Token Embedding':<35} {emb_params:>12,} {emb_params/total_params*100:>7.1f}%")
+    all_layers_params = sum(p.numel() for p in model.layers.parameters())
+    print(f"    {'Transformer Layers (total)':<35} {all_layers_params:>12,} {all_layers_params/total_params*100:>7.1f}%")
+    # Single layer breakdown
+    layer0_params = sum(p.numel() for p in model.layers[0].parameters())
+    attn_params = sum(p.numel() for p in model.layers[0].attn.parameters()) - sum(
+        b.numel() for b in model.layers[0].attn.buffers()
+    )
+    ff_params = sum(p.numel() for p in model.layers[0].ff.parameters())
+    norm_params = model.layers[0].norm1.weight.numel() + model.layers[0].norm2.weight.numel()
+    print(f"      {'  Per layer (x8):':<33} {layer0_params:>12,}")
+    print(f"      {'    Attention (Q/K/V/Out)':<33} {attn_params:>12,}")
+    print(f"      {'    Feed-Forward (SwiGLU)':<33} {ff_params:>12,}")
+    print(f"      {'    Norms (RMSNorm x2)':<33} {norm_params:>12,}")
+    final_norm_params = model.norm.weight.numel()
+    print(f"    {'Final RMSNorm':<35} {final_norm_params:>12,} {final_norm_params/total_params*100:>7.1f}%")
+    head_params = sum(p.numel() for p in model.intent_head.parameters())
+    print(f"    {'Classification Head':<35} {head_params:>12,} {head_params/total_params*100:>7.1f}%")
+    print(f"      {'  Linear(256, 256) + bias':<33} {256 * 256 + 256:>12,}")
+    print(f"      {'  Linear(256, 22) + bias':<33} {256 * 22 + 22:>12,}")
+    # Training
+    print("\n  TRAINING")
+    print(f"    Best Val Accuracy:  {checkpoint.get('val_acc', 0):.2f}%")
+    print(f"    Best Val Loss:      {checkpoint.get('val_loss', 0):.4f}")
+    print(f"    Best Epoch:         {checkpoint.get('epoch', 'N/A')}")
+    # Intent classes
+    print(f"\n  INTENT CLASSES ({len(INTENT_LABELS)})")
+    for i, label in enumerate(INTENT_LABELS):
+        print(f"    {i:>2}: {label}")
+    # File info
+    print(f"\n  FILES")
+    if os.path.exists(checkpoint_path):
+        model_size = os.path.getsize(checkpoint_path)
+        print(f"    Checkpoint:   {model_size / 1024 / 1024:.2f} MB")
+    tokenizer_path = "weights/tokenizer.json"
+    if os.path.exists(tokenizer_path):
+        tok_size = os.path.getsize(tokenizer_path)
+        print(f"    Tokenizer:    {tok_size / 1024:.1f} KB")
+    # Size comparison
+    print(f"\n  SIZE COMPARISON")
+    print(f"    {'Model':<30} {'Parameters':>15} {'Size':>10}")
+    print(f"    {'-' * 55}")
+    print(f"    {'JaneGPT v2 (this model)':<30} {total_params:>12,}   {total_params * 4 / 1024 / 1024:>5.1f} MB")
+    print(f"    {'DistilBERT':<30} {'66,000,000':>15} {'260.0 MB':>10}")
+    print(f"    {'BERT Base':<30} {'110,000,000':>15} {'440.0 MB':>10}")
+    print(f"    {'GPT-2 Small':<30} {'124,000,000':>15} {'500.0 MB':>10}")
+    print(f"    {'Llama 3 8B':<30} {'8,000,000,000':>15} {'  16.0 GB':>10}")
+    print(f"    {'GPT-4':<30} {'~1,800,000,000,000':>15} {'~  3.6 TB':>10}")
+    print(f"\n  Created by: Ravindu Senanayake")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()