| { |
| "model_key": "sentinel-mb-c-d11", |
| "encoder_model": "answerdotai/ModernBERT-base", |
| "encoder_params_millions": 149.7, |
| "head_type": "columnar", |
| "head_code": "c", |
| "head_variant": "d11", |
| "head_dropout": 0.1, |
| "head_div": 1, |
| "head_mul": 1, |
| "head_skip": true, |
| "head_architecture": "funnel", |
| "model_family": "modernbert-base", |
| "projection_size": 640, |
| "trainable_head_params": 14325653, |
| "dataset_counts": { |
| "train": 900, |
| "dev": 150, |
| "test": 150 |
| }, |
| "dataset_signature": { |
| "generator_version": "2026-04-07-final-audit-clear-v1", |
| "counts": { |
| "train": 900, |
| "dev": 150, |
| "test": 150 |
| }, |
| "distribution": { |
| "train": { |
| "risky": 603, |
| "clean": 297 |
| }, |
| "dev": { |
| "risky": 142, |
| "clean": 8 |
| }, |
| "test": { |
| "risky": 142, |
| "clean": 8 |
| } |
| } |
| }, |
| "label_groups": { |
| "severity": [ |
| "sev_0_compliant_or_ok", |
| "sev_1_minor", |
| "sev_2_moderate", |
| "sev_3_high" |
| ], |
| "domain": [ |
| "performance_claims_forecasting", |
| "investment_advice_suitability", |
| "conflicts_inducements", |
| "marketing_solicitation_advertising", |
| "selective_disclosure_fair_access", |
| "mnpi_insider_trading", |
| "recordkeeping_supervision", |
| "ai_automation_capability_claims", |
| "privacy_confidentiality", |
| "cybersecurity_internal_controls", |
| "employment_favoritism_role_conflict", |
| "aml_and_suspicious_activity", |
| "other_unknown" |
| ], |
| "subtype": [ |
| "speculative_outcomes_unqualified", |
| "implicit_or_explicit_guarantee", |
| "risk_context_omitted_or_unbalanced", |
| "unregistered_personalized_investment_advice", |
| "undisclosed_economic_conflict_or_referral", |
| "pressure_or_coercion", |
| "selective_disclosure", |
| "mnpi_misuse_or_encouragement", |
| "recordkeeping_or_preapproval_evasion", |
| "ai_autonomy_or_safety_overstatement", |
| "credentials_validation_or_compliance_misrepresentation", |
| "confidential_data_leakage", |
| "internal_controls_or_exception_process_leakage", |
| "academic_commercial_role_blurring_or_quid_pro_quo", |
| "improper_solicitation_offering_pressure", |
| "excessive_trading_or_account_churning", |
| "product_switching_without_cost_benefit_analysis", |
| "dual_registrant_capacity_or_wrap_fee_conflict_confusion", |
| "elder_exploitation_or_vulnerable_client_signal", |
| "suspicious_activity_indicator_or_structuring", |
| "influencer_or_social_media_promotion_compliance_failure", |
| "crypto_asset_misrepresentation_or_inadequate_disclosure", |
| "other_unknown" |
| ], |
| "jurisdiction": [ |
| "US", |
| "EU", |
| "UK", |
| "Other", |
| "Unknown" |
| ], |
| "why": [ |
| "forward_looking_statement_unqualified", |
| "guarantee_or_assurance_language", |
| "omits_material_risk_or_downside", |
| "implies_downside_protection_or_no_drawdown", |
| "cherry_picks_performance_period", |
| "omits_performance_methodology_or_gross_net_context", |
| "personalized_trade_or_allocation_recommendation", |
| "timing_or_sizing_guidance", |
| "creates_implied_advisory_relationship", |
| "conflict_not_disclosed", |
| "referral_relationship_not_disclosed", |
| "omits_fees_costs_or_reasonably_available_alternatives", |
| "selective_private_performance_or_fundraising_update", |
| "off_the_record_or_not_in_writing_language", |
| "mnpi_possession_indicated", |
| "encourages_action_before_public_release", |
| "avoid_recordkeeping_channel_shift", |
| "bypasses_required_preapproval", |
| "pressure_scarcity_urgency", |
| "unsubstantiated_social_proof_or_validation", |
| "omits_testimonial_endorsement_or_rating_disclosure", |
| "obscures_required_disclosure_or_form_crs", |
| "minimizes_need_for_diligence_or_compliance", |
| "overstates_ai_capability_or_removes_human_oversight", |
| "claims_compliance_risk_eliminated", |
| "shares_sensitive_personal_or_financial_data", |
| "violates_need_to_know_data_minimization", |
| "shares_sensitive_internal_controls_or_exceptions", |
| "role_power_imbalance_or_favoritism", |
| "excessive_trading_cost_to_equity", |
| "inadequate_customer_profile_or_suitability_basis", |
| "exploits_vulnerable_or_elderly_client", |
| "aml_suspicious_activity_indicator", |
| "omits_switching_costs_and_product_comparison", |
| "conflict_language_understates_actual_relationship", |
| "omits_influencer_compensation_or_affiliation_disclosure", |
| "misrepresents_sipc_or_regulatory_protection_for_crypto", |
| "data_breach_notification_obligation_triggered", |
| "impedes_regulatory_reporting_or_whistleblower_rights" |
| ], |
| "impacted_principles": [ |
| "truthful_non_misleading_communications", |
| "balanced_risk_reward_presentation", |
| "no_performance_guarantees_or_promissory_language", |
| "registration_and_scope_of_advice", |
| "duty_of_loyalty_conflict_disclosure", |
| "fair_access_to_material_information", |
| "insider_trading_and_mnpi_controls", |
| "supervision_and_books_records", |
| "privacy_confidentiality_and_secure_handling", |
| "security_control_integrity", |
| "role_separation_and_fair_access_in_academia", |
| "non_coercion_and_no_undue_influence", |
| "accurate_ai_capability_and_human_oversight", |
| "client_vulnerability_and_exploitation_prevention", |
| "aml_and_sanctions_compliance" |
| ], |
| "remediation_actions": [ |
| "add_forward_looking_disclaimer", |
| "reframe_as_scenarios_not_expectations", |
| "add_balanced_risk_and_downside_section", |
| "remove_or_soften_guarantee_language", |
| "remove_personalized_recommendations", |
| "add_registered_advice_boundary_language", |
| "disclose_conflicts_and_compensation", |
| "add_fees_costs_and_alternatives_comparison", |
| "use_standardized_approved_performance_materials", |
| "add_performance_methodology_and_gross_net_context", |
| "avoid_selective_disclosure_share_broadly", |
| "escalate_mnpi_to_compliance_and_halt", |
| "keep_discussion_on_retained_channels", |
| "require_formal_preapproval_before_send", |
| "remove_pressure_scarcity_and_use_factual_timeline", |
| "substantiation_or_remove_credibility_claims", |
| "add_testimonial_endorsement_and_rating_disclosure", |
| "make_required_disclosure_clear_and_prominent", |
| "avoid_minimizing_compliance_or_diligence", |
| "clarify_ai_is_assistive_with_human_review", |
| "remove_claims_that_ai_eliminates_risk", |
| "redact_and_minimize_sensitive_data", |
| "use_secure_transfer_and_limit_access", |
| "avoid_sharing_internal_controls_or_sanitize", |
| "route_academic_opportunities_through_institution", |
| "separate_recommendation_letters_from_work", |
| "assess_cost_to_equity_against_client_profile", |
| "flag_for_elder_exploitation_review_and_hold", |
| "assess_sar_filing_obligation_and_escalate", |
| "initiate_breach_notification_review_and_timeline", |
| "remove_provisions_impeding_regulatory_communications" |
| ] |
| }, |
| "metadata_groups": { |
| "content_type": [ |
| "email", |
| "message" |
| ], |
| "audience_segment": [ |
| "client", |
| "internal", |
| "prospect_or_investor", |
| "public", |
| "third_party" |
| ], |
| "detection_difficulty": [ |
| "obvious", |
| "moderate", |
| "subtle" |
| ], |
| "aggravating_factors": [ |
| "intentional", |
| "reckless", |
| "negligent", |
| "concealment_present", |
| "customer_harm_potential", |
| "financial_benefit_to_respondent", |
| "vulnerable_client", |
| "pattern_or_duration" |
| ] |
| }, |
| "output_signature": { |
| "violation": { |
| "type": "binary" |
| }, |
| "severity": { |
| "type": "multiclass", |
| "labels": [ |
| "sev_0_compliant_or_ok", |
| "sev_1_minor", |
| "sev_2_moderate", |
| "sev_3_high" |
| ] |
| }, |
| "domain": { |
| "type": "multiclass", |
| "labels": [ |
| "performance_claims_forecasting", |
| "investment_advice_suitability", |
| "conflicts_inducements", |
| "marketing_solicitation_advertising", |
| "selective_disclosure_fair_access", |
| "mnpi_insider_trading", |
| "recordkeeping_supervision", |
| "ai_automation_capability_claims", |
| "privacy_confidentiality", |
| "cybersecurity_internal_controls", |
| "employment_favoritism_role_conflict", |
| "aml_and_suspicious_activity", |
| "other_unknown" |
| ] |
| }, |
| "subtype": { |
| "type": "multiclass", |
| "labels": [ |
| "speculative_outcomes_unqualified", |
| "implicit_or_explicit_guarantee", |
| "risk_context_omitted_or_unbalanced", |
| "unregistered_personalized_investment_advice", |
| "undisclosed_economic_conflict_or_referral", |
| "pressure_or_coercion", |
| "selective_disclosure", |
| "mnpi_misuse_or_encouragement", |
| "recordkeeping_or_preapproval_evasion", |
| "ai_autonomy_or_safety_overstatement", |
| "credentials_validation_or_compliance_misrepresentation", |
| "confidential_data_leakage", |
| "internal_controls_or_exception_process_leakage", |
| "academic_commercial_role_blurring_or_quid_pro_quo", |
| "improper_solicitation_offering_pressure", |
| "excessive_trading_or_account_churning", |
| "product_switching_without_cost_benefit_analysis", |
| "dual_registrant_capacity_or_wrap_fee_conflict_confusion", |
| "elder_exploitation_or_vulnerable_client_signal", |
| "suspicious_activity_indicator_or_structuring", |
| "influencer_or_social_media_promotion_compliance_failure", |
| "crypto_asset_misrepresentation_or_inadequate_disclosure", |
| "other_unknown" |
| ] |
| }, |
| "jurisdiction": { |
| "type": "multiclass", |
| "labels": [ |
| "US", |
| "EU", |
| "UK", |
| "Other", |
| "Unknown" |
| ] |
| }, |
| "why": { |
| "type": "multilabel", |
| "labels": [ |
| "forward_looking_statement_unqualified", |
| "guarantee_or_assurance_language", |
| "omits_material_risk_or_downside", |
| "implies_downside_protection_or_no_drawdown", |
| "cherry_picks_performance_period", |
| "omits_performance_methodology_or_gross_net_context", |
| "personalized_trade_or_allocation_recommendation", |
| "timing_or_sizing_guidance", |
| "creates_implied_advisory_relationship", |
| "conflict_not_disclosed", |
| "referral_relationship_not_disclosed", |
| "omits_fees_costs_or_reasonably_available_alternatives", |
| "selective_private_performance_or_fundraising_update", |
| "off_the_record_or_not_in_writing_language", |
| "mnpi_possession_indicated", |
| "encourages_action_before_public_release", |
| "avoid_recordkeeping_channel_shift", |
| "bypasses_required_preapproval", |
| "pressure_scarcity_urgency", |
| "unsubstantiated_social_proof_or_validation", |
| "omits_testimonial_endorsement_or_rating_disclosure", |
| "obscures_required_disclosure_or_form_crs", |
| "minimizes_need_for_diligence_or_compliance", |
| "overstates_ai_capability_or_removes_human_oversight", |
| "claims_compliance_risk_eliminated", |
| "shares_sensitive_personal_or_financial_data", |
| "violates_need_to_know_data_minimization", |
| "shares_sensitive_internal_controls_or_exceptions", |
| "role_power_imbalance_or_favoritism", |
| "excessive_trading_cost_to_equity", |
| "inadequate_customer_profile_or_suitability_basis", |
| "exploits_vulnerable_or_elderly_client", |
| "aml_suspicious_activity_indicator", |
| "omits_switching_costs_and_product_comparison", |
| "conflict_language_understates_actual_relationship", |
| "omits_influencer_compensation_or_affiliation_disclosure", |
| "misrepresents_sipc_or_regulatory_protection_for_crypto", |
| "data_breach_notification_obligation_triggered", |
| "impedes_regulatory_reporting_or_whistleblower_rights" |
| ] |
| }, |
| "impacted_principles": { |
| "type": "multilabel", |
| "labels": [ |
| "truthful_non_misleading_communications", |
| "balanced_risk_reward_presentation", |
| "no_performance_guarantees_or_promissory_language", |
| "registration_and_scope_of_advice", |
| "duty_of_loyalty_conflict_disclosure", |
| "fair_access_to_material_information", |
| "insider_trading_and_mnpi_controls", |
| "supervision_and_books_records", |
| "privacy_confidentiality_and_secure_handling", |
| "security_control_integrity", |
| "role_separation_and_fair_access_in_academia", |
| "non_coercion_and_no_undue_influence", |
| "accurate_ai_capability_and_human_oversight", |
| "client_vulnerability_and_exploitation_prevention", |
| "aml_and_sanctions_compliance" |
| ] |
| }, |
| "remediation_actions": { |
| "type": "multilabel", |
| "labels": [ |
| "add_forward_looking_disclaimer", |
| "reframe_as_scenarios_not_expectations", |
| "add_balanced_risk_and_downside_section", |
| "remove_or_soften_guarantee_language", |
| "remove_personalized_recommendations", |
| "add_registered_advice_boundary_language", |
| "disclose_conflicts_and_compensation", |
| "add_fees_costs_and_alternatives_comparison", |
| "use_standardized_approved_performance_materials", |
| "add_performance_methodology_and_gross_net_context", |
| "avoid_selective_disclosure_share_broadly", |
| "escalate_mnpi_to_compliance_and_halt", |
| "keep_discussion_on_retained_channels", |
| "require_formal_preapproval_before_send", |
| "remove_pressure_scarcity_and_use_factual_timeline", |
| "substantiation_or_remove_credibility_claims", |
| "add_testimonial_endorsement_and_rating_disclosure", |
| "make_required_disclosure_clear_and_prominent", |
| "avoid_minimizing_compliance_or_diligence", |
| "clarify_ai_is_assistive_with_human_review", |
| "remove_claims_that_ai_eliminates_risk", |
| "redact_and_minimize_sensitive_data", |
| "use_secure_transfer_and_limit_access", |
| "avoid_sharing_internal_controls_or_sanitize", |
| "route_academic_opportunities_through_institution", |
| "separate_recommendation_letters_from_work", |
| "assess_cost_to_equity_against_client_profile", |
| "flag_for_elder_exploitation_review_and_hold", |
| "assess_sar_filing_obligation_and_escalate", |
| "initiate_breach_notification_review_and_timeline", |
| "remove_provisions_impeding_regulatory_communications" |
| ] |
| }, |
| "content_type": { |
| "type": "multiclass", |
| "labels": [ |
| "email", |
| "message" |
| ] |
| }, |
| "audience_segment": { |
| "type": "multiclass", |
| "labels": [ |
| "client", |
| "internal", |
| "prospect_or_investor", |
| "public", |
| "third_party" |
| ] |
| }, |
| "detection_difficulty": { |
| "type": "multiclass", |
| "labels": [ |
| "obvious", |
| "moderate", |
| "subtle" |
| ] |
| }, |
| "aggravating_factors": { |
| "type": "multilabel", |
| "labels": [ |
| "intentional", |
| "reckless", |
| "negligent", |
| "concealment_present", |
| "customer_harm_potential", |
| "financial_benefit_to_respondent", |
| "vulnerable_client", |
| "pattern_or_duration" |
| ] |
| } |
| }, |
| "device_info": { |
| "device": "cuda", |
| "torch_cuda_version": "11.8", |
| "gpu_count": 1, |
| "gpu_name": "NVIDIA GeForce RTX 2080 Ti", |
| "gpu_memory_gb": 11.0, |
| "gpu_capability": "7.5", |
| "nvidia_smi": [ |
| "NVIDIA GeForce RTX 2080 Ti, 591.74, 11264 MiB" |
| ] |
| }, |
| "timings": { |
| "encoding_seconds": 0.0, |
| "training_seconds": 82.25, |
| "total_seconds": 86.5 |
| }, |
| "cycles": [ |
| { |
| "loss": 7.7748064517974855, |
| "violation_accuracy": 0.9933333333333333, |
| "violation_precision": 1.0, |
| "violation_recall": 0.9929577464788732, |
| "violation_f1": 0.9964664310954063, |
| "severity_accuracy": 0.68, |
| "severity_precision_macro": 0.6277301315037164, |
| "severity_recall_macro": 0.707563025210084, |
| "severity_f1_macro": 0.6574701673088821, |
| "domain_accuracy": 0.8466666666666667, |
| "domain_precision_macro": 0.8728019516325967, |
| "domain_recall_macro": 0.8525462962962963, |
| "domain_f1_macro": 0.8494220062066961, |
| "subtype_accuracy": 0.7733333333333333, |
| "subtype_precision_macro": 0.8113931523022433, |
| "subtype_recall_macro": 0.7490964843237571, |
| "subtype_f1_macro": 0.7508012065714375, |
| "jurisdiction_accuracy": 0.6933333333333334, |
| "jurisdiction_precision_macro": 0.39713131313131317, |
| "jurisdiction_recall_macro": 0.4161038961038961, |
| "jurisdiction_f1_macro": 0.3923395902343271, |
| "why_precision_micro": 0.5229591836734694, |
| "why_precision_macro": 0.5766391767639499, |
| "why_recall_micro": 0.779467680608365, |
| "why_recall_macro": 0.7375322683014991, |
| "why_f1_micro": 0.6259541984732825, |
| "why_f1_macro": 0.6266278969973275, |
| "impacted_principles_precision_micro": 0.714859437751004, |
| "impacted_principles_precision_macro": 0.7255202728514017, |
| "impacted_principles_recall_micro": 0.8127853881278538, |
| "impacted_principles_recall_macro": 0.7848574654881022, |
| "impacted_principles_f1_micro": 0.7606837606837606, |
| "impacted_principles_f1_macro": 0.7439683431383844, |
| "remediation_actions_precision_micro": 0.6126760563380281, |
| "remediation_actions_precision_macro": 0.5838607852720756, |
| "remediation_actions_recall_micro": 0.7733333333333333, |
| "remediation_actions_recall_macro": 0.7043368620792969, |
| "remediation_actions_f1_micro": 0.6836935166994106, |
| "remediation_actions_f1_macro": 0.6242740328903318, |
| "content_type_accuracy": 1.0, |
| "content_type_precision_macro": 1.0, |
| "content_type_recall_macro": 1.0, |
| "content_type_f1_macro": 1.0, |
| "audience_segment_accuracy": 1.0, |
| "audience_segment_precision_macro": 1.0, |
| "audience_segment_recall_macro": 1.0, |
| "audience_segment_f1_macro": 1.0, |
| "detection_difficulty_accuracy": 0.44666666666666666, |
| "detection_difficulty_precision_macro": 0.4471819645732689, |
| "detection_difficulty_recall_macro": 0.46915306915306915, |
| "detection_difficulty_f1_macro": 0.4404195664321677, |
| "aggravating_factors_precision_micro": 0.5650793650793651, |
| "aggravating_factors_precision_macro": 0.550085885667087, |
| "aggravating_factors_recall_micro": 0.7574468085106383, |
| "aggravating_factors_recall_macro": 0.7552521514727553, |
| "aggravating_factors_f1_micro": 0.6472727272727272, |
| "aggravating_factors_f1_macro": 0.6306325855261203, |
| "stage_a_selection_score": 0.7456116562791146, |
| "selection_score": 0.7500419326212061, |
| "scenario_key_count": 150, |
| "rows_per_scenario_min": 1, |
| "rows_per_scenario_median": 1.0, |
| "rows_per_scenario_max": 1, |
| "violation_accuracy_scenario_macro": 0.9933333333333333, |
| "violation_accuracy_scenario_macro_risky": 0.9929577464788732, |
| "violation_accuracy_scenario_macro_clean": 1.0, |
| "violation_accuracy_scenario_min": 0.0, |
| "violation_worst_scenario_key": "train_1371", |
| "violation_worst_scenario_label": "risky", |
| "cycle": 1, |
| "best_epoch": 15, |
| "epochs_ran": 21, |
| "lr": 0.003, |
| "head_dropout": 0.1, |
| "weight_decay": 0.01, |
| "cycle_seconds": 20.43 |
| }, |
| { |
| "loss": 11.207931518554688, |
| "violation_accuracy": 0.9933333333333333, |
| "violation_precision": 1.0, |
| "violation_recall": 0.9929577464788732, |
| "violation_f1": 0.9964664310954063, |
| "severity_accuracy": 0.7133333333333334, |
| "severity_precision_macro": 0.5736714975845411, |
| "severity_recall_macro": 0.5810399159663866, |
| "severity_f1_macro": 0.577203237410072, |
| "domain_accuracy": 0.8733333333333333, |
| "domain_precision_macro": 0.9152304502304504, |
| "domain_recall_macro": 0.9037037037037038, |
| "domain_f1_macro": 0.8981829715276235, |
| "subtype_accuracy": 0.82, |
| "subtype_precision_macro": 0.8295979273252001, |
| "subtype_recall_macro": 0.8100452577725306, |
| "subtype_f1_macro": 0.8046637752590468, |
| "jurisdiction_accuracy": 0.6933333333333334, |
| "jurisdiction_precision_macro": 0.41350649350649354, |
| "jurisdiction_recall_macro": 0.4179220779220779, |
| "jurisdiction_f1_macro": 0.4076005906238464, |
| "why_precision_micro": 0.6041666666666666, |
| "why_precision_macro": 0.5994839193351778, |
| "why_recall_micro": 0.7718631178707225, |
| "why_recall_macro": 0.7338144761221683, |
| "why_f1_micro": 0.67779632721202, |
| "why_f1_macro": 0.6489732285249087, |
| "impacted_principles_precision_micro": 0.7204724409448819, |
| "impacted_principles_precision_macro": 0.7509759521524227, |
| "impacted_principles_recall_micro": 0.8356164383561644, |
| "impacted_principles_recall_macro": 0.7889626527134941, |
| "impacted_principles_f1_micro": 0.773784355179704, |
| "impacted_principles_f1_macro": 0.7575593568585874, |
| "remediation_actions_precision_micro": 0.6105263157894737, |
| "remediation_actions_precision_macro": 0.5976390453783973, |
| "remediation_actions_recall_micro": 0.7733333333333333, |
| "remediation_actions_recall_macro": 0.690795299444056, |
| "remediation_actions_f1_micro": 0.6823529411764706, |
| "remediation_actions_f1_macro": 0.6264413385705756, |
| "content_type_accuracy": 1.0, |
| "content_type_precision_macro": 1.0, |
| "content_type_recall_macro": 1.0, |
| "content_type_f1_macro": 1.0, |
| "audience_segment_accuracy": 1.0, |
| "audience_segment_precision_macro": 1.0, |
| "audience_segment_recall_macro": 1.0, |
| "audience_segment_f1_macro": 1.0, |
| "detection_difficulty_accuracy": 0.41333333333333333, |
| "detection_difficulty_precision_macro": 0.4076248313090418, |
| "detection_difficulty_recall_macro": 0.4146464646464647, |
| "detection_difficulty_f1_macro": 0.41032213795594075, |
| "aggravating_factors_precision_micro": 0.6544715447154471, |
| "aggravating_factors_precision_macro": 0.6429940120546376, |
| "aggravating_factors_recall_micro": 0.6851063829787234, |
| "aggravating_factors_recall_macro": 0.6755889259203152, |
| "aggravating_factors_f1_micro": 0.6694386694386694, |
| "aggravating_factors_f1_macro": 0.6555896631040743, |
| "stage_a_selection_score": 0.767369221062852, |
| "selection_score": 0.7671909669032824, |
| "scenario_key_count": 150, |
| "rows_per_scenario_min": 1, |
| "rows_per_scenario_median": 1.0, |
| "rows_per_scenario_max": 1, |
| "violation_accuracy_scenario_macro": 0.9933333333333333, |
| "violation_accuracy_scenario_macro_risky": 0.9929577464788732, |
| "violation_accuracy_scenario_macro_clean": 1.0, |
| "violation_accuracy_scenario_min": 0.0, |
| "violation_worst_scenario_key": "train_1371", |
| "violation_worst_scenario_label": "risky", |
| "cycle": 2, |
| "best_epoch": 28, |
| "epochs_ran": 34, |
| "lr": 0.001, |
| "head_dropout": 0.1, |
| "weight_decay": 0.0, |
| "cycle_seconds": 32.74 |
| }, |
| { |
| "loss": 8.636670589447021, |
| "violation_accuracy": 0.9866666666666667, |
| "violation_precision": 0.9929577464788732, |
| "violation_recall": 0.9929577464788732, |
| "violation_f1": 0.9929577464788732, |
| "severity_accuracy": 0.66, |
| "severity_precision_macro": 0.4967107870333677, |
| "severity_recall_macro": 0.5516281512605042, |
| "severity_f1_macro": 0.5189861673414305, |
| "domain_accuracy": 0.8666666666666667, |
| "domain_precision_macro": 0.8598119380377445, |
| "domain_recall_macro": 0.9199074074074075, |
| "domain_f1_macro": 0.8685643227768131, |
| "subtype_accuracy": 0.7866666666666666, |
| "subtype_precision_macro": 0.8163114663114664, |
| "subtype_recall_macro": 0.775937950937951, |
| "subtype_f1_macro": 0.7646749863327403, |
| "jurisdiction_accuracy": 0.7733333333333333, |
| "jurisdiction_precision_macro": 0.5372131147540984, |
| "jurisdiction_recall_macro": 0.4397402597402597, |
| "jurisdiction_f1_macro": 0.47636711947056776, |
| "why_precision_micro": 0.4528301886792453, |
| "why_precision_macro": 0.48296837652041275, |
| "why_recall_micro": 0.8212927756653993, |
| "why_recall_macro": 0.7714144117990271, |
| "why_f1_micro": 0.5837837837837838, |
| "why_f1_macro": 0.5733734446440217, |
| "impacted_principles_precision_micro": 0.6332179930795848, |
| "impacted_principles_precision_macro": 0.6573170966740058, |
| "impacted_principles_recall_micro": 0.8356164383561644, |
| "impacted_principles_recall_macro": 0.8099590558905407, |
| "impacted_principles_f1_micro": 0.7204724409448819, |
| "impacted_principles_f1_macro": 0.7112060969947187, |
| "remediation_actions_precision_micro": 0.5207756232686981, |
| "remediation_actions_precision_macro": 0.5043151896069353, |
| "remediation_actions_recall_micro": 0.8355555555555556, |
| "remediation_actions_recall_macro": 0.7390617197643065, |
| "remediation_actions_f1_micro": 0.6416382252559728, |
| "remediation_actions_f1_macro": 0.5815208256530966, |
| "content_type_accuracy": 1.0, |
| "content_type_precision_macro": 1.0, |
| "content_type_recall_macro": 1.0, |
| "content_type_f1_macro": 1.0, |
| "audience_segment_accuracy": 1.0, |
| "audience_segment_precision_macro": 1.0, |
| "audience_segment_recall_macro": 1.0, |
| "audience_segment_f1_macro": 1.0, |
| "detection_difficulty_accuracy": 0.41333333333333333, |
| "detection_difficulty_precision_macro": 0.39879147137211657, |
| "detection_difficulty_recall_macro": 0.40982905982905987, |
| "detection_difficulty_f1_macro": 0.4012269618676941, |
| "aggravating_factors_precision_micro": 0.5627009646302251, |
| "aggravating_factors_precision_macro": 0.5604147213071698, |
| "aggravating_factors_recall_micro": 0.7446808510638298, |
| "aggravating_factors_recall_macro": 0.7464349207339351, |
| "aggravating_factors_f1_micro": 0.6410256410256411, |
| "aggravating_factors_f1_macro": 0.6206138817554251, |
| "stage_a_selection_score": 0.7407038445266995, |
| "selection_score": 0.7436551178025155, |
| "scenario_key_count": 150, |
| "rows_per_scenario_min": 1, |
| "rows_per_scenario_median": 1.0, |
| "rows_per_scenario_max": 1, |
| "violation_accuracy_scenario_macro": 0.9866666666666667, |
| "violation_accuracy_scenario_macro_risky": 0.9929577464788732, |
| "violation_accuracy_scenario_macro_clean": 0.875, |
| "violation_accuracy_scenario_min": 0.0, |
| "violation_worst_scenario_key": "train_1371", |
| "violation_worst_scenario_label": "risky", |
| "cycle": 3, |
| "best_epoch": 23, |
| "epochs_ran": 29, |
| "lr": 0.0005, |
| "head_dropout": 0.1, |
| "weight_decay": 0.02, |
| "cycle_seconds": 27.33 |
| } |
| ], |
| "best_cycle": { |
| "loss": 11.207931518554688, |
| "violation_accuracy": 0.9933333333333333, |
| "violation_precision": 1.0, |
| "violation_recall": 0.9929577464788732, |
| "violation_f1": 0.9964664310954063, |
| "severity_accuracy": 0.7133333333333334, |
| "severity_precision_macro": 0.5736714975845411, |
| "severity_recall_macro": 0.5810399159663866, |
| "severity_f1_macro": 0.577203237410072, |
| "domain_accuracy": 0.8733333333333333, |
| "domain_precision_macro": 0.9152304502304504, |
| "domain_recall_macro": 0.9037037037037038, |
| "domain_f1_macro": 0.8981829715276235, |
| "subtype_accuracy": 0.82, |
| "subtype_precision_macro": 0.8295979273252001, |
| "subtype_recall_macro": 0.8100452577725306, |
| "subtype_f1_macro": 0.8046637752590468, |
| "jurisdiction_accuracy": 0.6933333333333334, |
| "jurisdiction_precision_macro": 0.41350649350649354, |
| "jurisdiction_recall_macro": 0.4179220779220779, |
| "jurisdiction_f1_macro": 0.4076005906238464, |
| "why_precision_micro": 0.6041666666666666, |
| "why_precision_macro": 0.5994839193351778, |
| "why_recall_micro": 0.7718631178707225, |
| "why_recall_macro": 0.7338144761221683, |
| "why_f1_micro": 0.67779632721202, |
| "why_f1_macro": 0.6489732285249087, |
| "impacted_principles_precision_micro": 0.7204724409448819, |
| "impacted_principles_precision_macro": 0.7509759521524227, |
| "impacted_principles_recall_micro": 0.8356164383561644, |
| "impacted_principles_recall_macro": 0.7889626527134941, |
| "impacted_principles_f1_micro": 0.773784355179704, |
| "impacted_principles_f1_macro": 0.7575593568585874, |
| "remediation_actions_precision_micro": 0.6105263157894737, |
| "remediation_actions_precision_macro": 0.5976390453783973, |
| "remediation_actions_recall_micro": 0.7733333333333333, |
| "remediation_actions_recall_macro": 0.690795299444056, |
| "remediation_actions_f1_micro": 0.6823529411764706, |
| "remediation_actions_f1_macro": 0.6264413385705756, |
| "content_type_accuracy": 1.0, |
| "content_type_precision_macro": 1.0, |
| "content_type_recall_macro": 1.0, |
| "content_type_f1_macro": 1.0, |
| "audience_segment_accuracy": 1.0, |
| "audience_segment_precision_macro": 1.0, |
| "audience_segment_recall_macro": 1.0, |
| "audience_segment_f1_macro": 1.0, |
| "detection_difficulty_accuracy": 0.41333333333333333, |
| "detection_difficulty_precision_macro": 0.4076248313090418, |
| "detection_difficulty_recall_macro": 0.4146464646464647, |
| "detection_difficulty_f1_macro": 0.41032213795594075, |
| "aggravating_factors_precision_micro": 0.6544715447154471, |
| "aggravating_factors_precision_macro": 0.6429940120546376, |
| "aggravating_factors_recall_micro": 0.6851063829787234, |
| "aggravating_factors_recall_macro": 0.6755889259203152, |
| "aggravating_factors_f1_micro": 0.6694386694386694, |
| "aggravating_factors_f1_macro": 0.6555896631040743, |
| "stage_a_selection_score": 0.767369221062852, |
| "selection_score": 0.7671909669032824, |
| "scenario_key_count": 150, |
| "rows_per_scenario_min": 1, |
| "rows_per_scenario_median": 1.0, |
| "rows_per_scenario_max": 1, |
| "violation_accuracy_scenario_macro": 0.9933333333333333, |
| "violation_accuracy_scenario_macro_risky": 0.9929577464788732, |
| "violation_accuracy_scenario_macro_clean": 1.0, |
| "violation_accuracy_scenario_min": 0.0, |
| "violation_worst_scenario_key": "train_1371", |
| "violation_worst_scenario_label": "risky", |
| "cycle": 2, |
| "best_epoch": 28, |
| "epochs_ran": 34, |
| "lr": 0.001, |
| "head_dropout": 0.1, |
| "weight_decay": 0.0, |
| "cycle_seconds": 32.74 |
| }, |
| "train": { |
| "loss": 0.29129520431160927, |
| "violation_accuracy": 1.0, |
| "violation_precision": 1.0, |
| "violation_recall": 1.0, |
| "violation_f1": 1.0, |
| "severity_accuracy": 0.9733333333333334, |
| "severity_precision_macro": 0.979737423027768, |
| "severity_recall_macro": 0.9795454017784588, |
| "severity_f1_macro": 0.9795522630686699, |
| "domain_accuracy": 0.9911111111111112, |
| "domain_precision_macro": 0.9922987117552334, |
| "domain_recall_macro": 0.9966329966329965, |
| "domain_f1_macro": 0.9943418090318922, |
| "subtype_accuracy": 1.0, |
| "subtype_precision_macro": 1.0, |
| "subtype_recall_macro": 1.0, |
| "subtype_f1_macro": 1.0, |
| "jurisdiction_accuracy": 0.9711111111111111, |
| "jurisdiction_precision_macro": 0.8754392733703078, |
| "jurisdiction_recall_macro": 0.9931847968545217, |
| "jurisdiction_f1_macro": 0.9279309415166559, |
| "why_precision_micro": 0.7692307692307693, |
| "why_precision_macro": 0.7629469993662027, |
| "why_recall_micro": 1.0, |
| "why_recall_macro": 0.9487179487179487, |
| "why_f1_micro": 0.8695652173913044, |
| "why_f1_macro": 0.8392609851418487, |
| "impacted_principles_precision_micro": 0.9257028112449799, |
| "impacted_principles_precision_macro": 0.8949360744911738, |
| "impacted_principles_recall_micro": 0.9913978494623656, |
| "impacted_principles_recall_macro": 0.9304394224733208, |
| "impacted_principles_f1_micro": 0.9574247144340603, |
| "impacted_principles_f1_macro": 0.9116715954228238, |
| "remediation_actions_precision_micro": 0.8031128404669261, |
| "remediation_actions_precision_macro": 0.743125500508932, |
| "remediation_actions_recall_micro": 0.9990319457889641, |
| "remediation_actions_recall_macro": 0.9029428409734013, |
| "remediation_actions_f1_micro": 0.8904227782571181, |
| "remediation_actions_f1_macro": 0.8109253028549341, |
| "content_type_accuracy": 1.0, |
| "content_type_precision_macro": 1.0, |
| "content_type_recall_macro": 1.0, |
| "content_type_f1_macro": 1.0, |
| "audience_segment_accuracy": 1.0, |
| "audience_segment_precision_macro": 1.0, |
| "audience_segment_recall_macro": 1.0, |
| "audience_segment_f1_macro": 1.0, |
| "detection_difficulty_accuracy": 0.9944444444444445, |
| "detection_difficulty_precision_macro": 0.9945552657437111, |
| "detection_difficulty_recall_macro": 0.9945552657437111, |
| "detection_difficulty_f1_macro": 0.9945552657437111, |
| "aggravating_factors_precision_micro": 0.9263862332695985, |
| "aggravating_factors_precision_macro": 0.9236474949570554, |
| "aggravating_factors_recall_micro": 0.9979402677651905, |
| "aggravating_factors_recall_macro": 0.9992537313432837, |
| "aggravating_factors_f1_micro": 0.9608329201784829, |
| "aggravating_factors_f1_macro": 0.9592327500257608, |
| "stage_a_selection_score": 0.937580517111993, |
| "selection_score": 0.947753814478068, |
| "scenario_key_count": 900, |
| "rows_per_scenario_min": 1, |
| "rows_per_scenario_median": 1.0, |
| "rows_per_scenario_max": 1, |
| "violation_accuracy_scenario_macro": 1.0, |
| "violation_accuracy_scenario_macro_risky": 1.0, |
| "violation_accuracy_scenario_macro_clean": 1.0, |
| "violation_accuracy_scenario_min": 1.0, |
| "violation_worst_scenario_key": "train_1001", |
| "violation_worst_scenario_label": "risky" |
| }, |
| "dev": { |
| "loss": 11.207931518554688, |
| "violation_accuracy": 0.9933333333333333, |
| "violation_precision": 1.0, |
| "violation_recall": 0.9929577464788732, |
| "violation_f1": 0.9964664310954063, |
| "severity_accuracy": 0.7133333333333334, |
| "severity_precision_macro": 0.5736714975845411, |
| "severity_recall_macro": 0.5810399159663866, |
| "severity_f1_macro": 0.577203237410072, |
| "domain_accuracy": 0.8733333333333333, |
| "domain_precision_macro": 0.9152304502304504, |
| "domain_recall_macro": 0.9037037037037038, |
| "domain_f1_macro": 0.8981829715276235, |
| "subtype_accuracy": 0.82, |
| "subtype_precision_macro": 0.8295979273252001, |
| "subtype_recall_macro": 0.8100452577725306, |
| "subtype_f1_macro": 0.8046637752590468, |
| "jurisdiction_accuracy": 0.6933333333333334, |
| "jurisdiction_precision_macro": 0.41350649350649354, |
| "jurisdiction_recall_macro": 0.4179220779220779, |
| "jurisdiction_f1_macro": 0.4076005906238464, |
| "why_precision_micro": 0.616822429906542, |
| "why_precision_macro": 0.6160081633765844, |
| "why_recall_micro": 0.752851711026616, |
| "why_recall_macro": 0.7186333609410531, |
| "why_f1_micro": 0.678082191780822, |
| "why_f1_macro": 0.6517414247029207, |
| "impacted_principles_precision_micro": 0.7631578947368421, |
| "impacted_principles_precision_macro": 0.7874420024420025, |
| "impacted_principles_recall_micro": 0.7945205479452054, |
| "impacted_principles_recall_macro": 0.7614157289194307, |
| "impacted_principles_f1_micro": 0.7785234899328859, |
| "impacted_principles_f1_macro": 0.7660467655075498, |
| "remediation_actions_precision_micro": 0.6105263157894737, |
| "remediation_actions_precision_macro": 0.5976390453783973, |
| "remediation_actions_recall_micro": 0.7733333333333333, |
| "remediation_actions_recall_macro": 0.690795299444056, |
| "remediation_actions_f1_micro": 0.6823529411764706, |
| "remediation_actions_f1_macro": 0.6264413385705756, |
| "content_type_accuracy": 1.0, |
| "content_type_precision_macro": 1.0, |
| "content_type_recall_macro": 1.0, |
| "content_type_f1_macro": 1.0, |
| "audience_segment_accuracy": 1.0, |
| "audience_segment_precision_macro": 1.0, |
| "audience_segment_recall_macro": 1.0, |
| "audience_segment_f1_macro": 1.0, |
| "detection_difficulty_accuracy": 0.41333333333333333, |
| "detection_difficulty_precision_macro": 0.4076248313090418, |
| "detection_difficulty_recall_macro": 0.4146464646464647, |
| "detection_difficulty_f1_macro": 0.41032213795594075, |
| "aggravating_factors_precision_micro": 0.6404494382022472, |
| "aggravating_factors_precision_macro": 0.6351122397339503, |
| "aggravating_factors_recall_micro": 0.7276595744680852, |
| "aggravating_factors_recall_macro": 0.7164210015443564, |
| "aggravating_factors_f1_micro": 0.6812749003984064, |
| "aggravating_factors_f1_macro": 0.6705742793431082, |
| "stage_a_selection_score": 0.7687761716662238, |
| "selection_score": 0.7690657581979315, |
| "scenario_key_count": 150, |
| "rows_per_scenario_min": 1, |
| "rows_per_scenario_median": 1.0, |
| "rows_per_scenario_max": 1, |
| "violation_accuracy_scenario_macro": 0.9933333333333333, |
| "violation_accuracy_scenario_macro_risky": 0.9929577464788732, |
| "violation_accuracy_scenario_macro_clean": 1.0, |
| "violation_accuracy_scenario_min": 0.0, |
| "violation_worst_scenario_key": "train_1371", |
| "violation_worst_scenario_label": "risky" |
| }, |
| "test": { |
| "loss": 10.207207107543946, |
| "violation_accuracy": 0.9866666666666667, |
| "violation_precision": 1.0, |
| "violation_recall": 0.9859154929577465, |
| "violation_f1": 0.9929078014184397, |
| "severity_accuracy": 0.7266666666666667, |
| "severity_precision_macro": 0.7056742540613509, |
| "severity_recall_macro": 0.6917853651724619, |
| "severity_f1_macro": 0.6937461494861875, |
| "domain_accuracy": 0.82, |
| "domain_precision_macro": 0.8639371000239372, |
| "domain_recall_macro": 0.7870126705653021, |
| "domain_f1_macro": 0.8032142065328451, |
| "subtype_accuracy": 0.7733333333333333, |
| "subtype_precision_macro": 0.7708825265643447, |
| "subtype_recall_macro": 0.7368260527351436, |
| "subtype_f1_macro": 0.7383595011385061, |
| "jurisdiction_accuracy": 0.74, |
| "jurisdiction_precision_macro": 0.5511805026656511, |
| "jurisdiction_recall_macro": 0.5755799755799755, |
| "jurisdiction_f1_macro": 0.5608646466716769, |
| "why_precision_micro": 0.6408045977011494, |
| "why_precision_macro": 0.6228897802851919, |
| "why_recall_micro": 0.8228782287822878, |
| "why_recall_macro": 0.7797228098698687, |
| "why_f1_micro": 0.7205169628432957, |
| "why_f1_macro": 0.6837887640406874, |
| "impacted_principles_precision_micro": 0.7368421052631579, |
| "impacted_principles_precision_macro": 0.7691853878810401, |
| "impacted_principles_recall_micro": 0.7636363636363637, |
| "impacted_principles_recall_macro": 0.6710974322869485, |
| "impacted_principles_f1_micro": 0.7499999999999999, |
| "impacted_principles_f1_macro": 0.7030370589130892, |
| "remediation_actions_precision_micro": 0.6188811188811189, |
| "remediation_actions_precision_macro": 0.5923653065256482, |
| "remediation_actions_recall_micro": 0.7695652173913043, |
| "remediation_actions_recall_macro": 0.684497765569872, |
| "remediation_actions_f1_micro": 0.686046511627907, |
| "remediation_actions_f1_macro": 0.6175714466344578, |
| "content_type_accuracy": 1.0, |
| "content_type_precision_macro": 1.0, |
| "content_type_recall_macro": 1.0, |
| "content_type_f1_macro": 1.0, |
| "audience_segment_accuracy": 1.0, |
| "audience_segment_precision_macro": 1.0, |
| "audience_segment_recall_macro": 1.0, |
| "audience_segment_f1_macro": 1.0, |
| "detection_difficulty_accuracy": 0.47333333333333333, |
| "detection_difficulty_precision_macro": 0.46757744378508614, |
| "detection_difficulty_recall_macro": 0.471182412358883, |
| "detection_difficulty_f1_macro": 0.46490073858516184, |
| "aggravating_factors_precision_micro": 0.6641509433962264, |
| "aggravating_factors_precision_macro": 0.6283313196161129, |
| "aggravating_factors_recall_micro": 0.7333333333333333, |
| "aggravating_factors_recall_macro": 0.6949052211781471, |
| "aggravating_factors_f1_micro": 0.697029702970297, |
| "aggravating_factors_f1_macro": 0.6546016914120363, |
| "stage_a_selection_score": 0.7506931806680867, |
| "selection_score": 0.7565296660343293, |
| "scenario_key_count": 150, |
| "rows_per_scenario_min": 1, |
| "rows_per_scenario_median": 1.0, |
| "rows_per_scenario_max": 1, |
| "violation_accuracy_scenario_macro": 0.9866666666666667, |
| "violation_accuracy_scenario_macro_risky": 0.9859154929577465, |
| "violation_accuracy_scenario_macro_clean": 1.0, |
| "violation_accuracy_scenario_min": 0.0, |
| "violation_worst_scenario_key": "train_1843", |
| "violation_worst_scenario_label": "risky" |
| }, |
| "thresholds": { |
| "violation": 0.5, |
| "why": 0.55, |
| "impacted_principles": 0.7, |
| "remediation_actions": 0.5, |
| "aggravating_factors": 0.4 |
| }, |
| "log_path": "_cache/logs/legacy/stage-a-grid-v3-gpu/raw/260424_135746_sentinel-mb-c-d11.log", |
| "prior_poc_inflation_factors": [ |
| "The previous PoC reused the same 17 synthetic families across train, dev, and test, so the model mostly learned family signatures rather than broad compliance reasoning.", |
| "Every prior observation carried extra structural cues such as source metadata, evidence snippets, and explicit jurisdiction sentences appended to the text.", |
| "A later dataset refactor silently dropped jurisdiction, impacted-principle, and remediation heads, which made the reported Stage A contract narrower than the product actually promises.", |
| "Reported micro metrics on dense negative label maps made performance look cleaner than a realistic class-by-class review would suggest." |
| ], |
| "mitigations": [ |
| "The data pipeline now uses a 150-row agent-authored pilot plus a hard human-review gate before any 1000/100/100 release split is allowed to exist on disk.", |
| "The generation workflow now keeps Python limited to validation, formatting, duplicate review, and statistics while the agent authors and labels each observation directly.", |
| "The encoder default still uses a 512-token window, which comfortably covers the current 1000-character manual-authoring ceiling.", |
| "The full Stage A diagnose/prescribe contract is restored in both dataset and model outputs: jurisdiction, why, impacted principles, remediation actions, detection difficulty, and aggravating factors are all explicit.", |
| "Dataset generation now validates the mock contract keys directly and requires a human-reviewed approval hash before contract changes can pass validation.", |
| "The model factory now constructs full model bundles, while checkpoints store the trained projection and heads plus the frozen encoder reference instead of duplicating immutable backbone weights.", |
| "Evaluation artifacts now report scenario-family macro violation metrics and worst-family binary performance so repeated rows inside a narrow split cannot hide behind a flattering row-average alone.", |
| "Cross-checkpoint comparison artifacts are only kept when they are refreshed against the current dataset, preventing stale benchmark reports from masquerading as current evidence." |
| ], |
| "artifact_format": "checkpoint_only", |
| "end_to_end_serialized": false, |
| "transformers_bundle_dir": null, |
| "checkpoint_dir": "_models/stage-a-grid-v3-gpu/sentinel-mb-c-d11/260424_135913_sentinel-mb-c-d11", |
| "display_name": "sentinel-mb-c-d11@260424_135913" |
| } |
|
|