{ "release": "IrishCore-GlobalPointer-135M-v1-rc3", "base_model": "OpenMed/OpenMed-PII-mLiteClinical-Base-135M-v1", "public_references": { "closest_raw_only_release": "temsa/IrishCore-DiffMask-135M-v1-rc6", "closest_openmed_release": "temsa/OpenMed-mLiteClinical-IrishCorePII-135M-v2-rc8" }, "task": "Irish core PII detection and masking in English and Irish Gaelic", "coverage": [ "ACCOUNT_NUMBER", "BANK_ROUTING_NUMBER", "CREDIT_DEBIT_CARD", "EMAIL", "FIRST_NAME", "LAST_NAME", "PASSPORT_NUMBER", "PHONE_NUMBER", "POSTCODE", "PPSN", "SWIFT_BIC" ], "architecture": { "family": "DistilBERT-size GlobalPointer span extractor", "raw_only": true, "scanner_free": true, "validator_free": true, "head": "typed span matrix", "uses_rope": true }, "training_data": { "published": [ "temsa/OpenMed-Irish-CorePII-TrainMix-v1", "temsa/OpenMed-Irish-PPSN-Eircode-Spec-v1", "joelniklaus/mapa", "gretelai/synthetic_pii_finance_multilingual" ], "local_selection_mix": "irish_core_globalpointer_focus_chatbot_v3", "selection_note": "The published checkpoint was selected after comparing span-head variants against Irish core, UAT replay, hardening, and fresh holdout suites." }, "training_recipe": { "init_mode": "full_global_pointer", "initialized_encoder_from": "local DiffMask rc6 family checkpoint", "freeze_layers": 2, "max_length": 128, "span_positive_weight": 8.0, "head_size": 64, "use_rope": true, "negative_ratio": 16, "min_negatives": 256, "loss": "positive-weighted BCE over upper-triangular spans with hard-negative mining" }, "references": [ { "title": "DistilBERT", "url": "https://arxiv.org/abs/1910.01108" }, { "title": "Global Pointer: Novel Efficient Span-based Approach for Named Entity Recognition", "url": "https://arxiv.org/abs/2208.03054" }, { "title": "RoFormer: Enhanced Transformer with Rotary Position Embedding", "url": "https://arxiv.org/abs/2104.09864" } ] }