TS-TinyVerifier-v0 / learned_candidate_model_stress_report.json
BoggersTheFish's picture
Add TS-Reasoner v2.0.0 learned candidate model artifact
f976dab verified
Raw
History Blame Contribute Delete
97.1 kB
{
"case_count": 4,
"dataset": "data/learned_candidate_model_stress.jsonl",
"metrics": {
"abstention_accuracy": 1.0,
"accepted_candidate_support_rate": 1.0,
"bad_candidate_rejection_rate": 1.0,
"candidate_graph_contamination_count": 0,
"candidate_ranking_accuracy": 1.0,
"channel_activation_accuracy": 0.9886,
"deeper_chain_success_rate": 1.0,
"distractor_robustness": 1.0,
"resolver_prediction_accuracy": 1.0,
"trace_schema_validity": 1.0,
"verifier_beats_model_confidence_rate": 1.0
},
"results": [
{
"abstention_checks": [],
"accepted_support_checks": [
true
],
"bad_rejection_checks": [
true,
true
],
"candidate_graph_contamination_count": 0,
"candidate_ranking_correct": true,
"case_id": "stress_high_confidence_wrong",
"channel_activation_checks": [
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true
],
"deeper_chain_checks": [
true
],
"is_distractor": false,
"resolver_prediction_checks": [
true,
true,
true
],
"scored_candidates": [
{
"candidate_id": "valid_m_p",
"claim": "All M are P",
"confidence": 0.42,
"features": {
"accepted_relation_candidate": 1.0,
"bias": 1.0,
"candidate_confidence": 0.42,
"candidate_quantifier_all": 1.0,
"candidate_quantifier_no": 0.0,
"candidate_quantifier_some": 0.0,
"candidate_subject_eq_predicate": 0.0,
"contradiction_candidate": 0.0,
"deeper_chain_case": 1.0,
"direct_support": 0.0,
"has_distractor": 0.0,
"identity_candidate": 0.0,
"identity_path_exists": 0.0,
"malformed_candidate": 0.0,
"no_against_transitive_support": 0.0,
"parseable_relation": 1.0,
"premise_count": 3.0,
"reverse_path": 0.0,
"some_to_all_risk": 0.0,
"support_depth": 3.0,
"transitive_support": 1.0,
"unsupported_relation_candidate": 0.0
},
"prediction": {
"channels": [
"logic_transitivity"
],
"model_confidence": 0.7622,
"ranking_score": 1.1648,
"resolver": "accept_transitive",
"status": "accepted"
},
"raw_output": "All M are P",
"source": "learned_candidate_dataset"
},
{
"candidate_id": "bad_identity_m_p",
"claim": "M equals P",
"confidence": 0.98,
"features": {
"accepted_relation_candidate": 0.0,
"bias": 1.0,
"candidate_confidence": 0.98,
"candidate_quantifier_all": 0.0,
"candidate_quantifier_no": 0.0,
"candidate_quantifier_some": 0.0,
"candidate_subject_eq_predicate": 0.0,
"contradiction_candidate": 0.0,
"deeper_chain_case": 1.0,
"direct_support": 0.0,
"has_distractor": 0.0,
"identity_candidate": 1.0,
"identity_path_exists": 1.0,
"malformed_candidate": 0.0,
"no_against_transitive_support": 0.0,
"parseable_relation": 0.0,
"premise_count": 3.0,
"reverse_path": 0.0,
"some_to_all_risk": 0.0,
"support_depth": 0.0,
"transitive_support": 0.0,
"unsupported_relation_candidate": 0.0
},
"prediction": {
"channels": [
"identity_preservation"
],
"model_confidence": 0.0475,
"ranking_score": -2.9988,
"resolver": "reject_identity",
"status": "rejected"
},
"raw_output": "M equals P",
"source": "learned_candidate_dataset"
},
{
"candidate_id": "bad_reverse_p_m",
"claim": "All P are M",
"confidence": 0.99,
"features": {
"accepted_relation_candidate": 0.0,
"bias": 1.0,
"candidate_confidence": 0.99,
"candidate_quantifier_all": 1.0,
"candidate_quantifier_no": 0.0,
"candidate_quantifier_some": 0.0,
"candidate_subject_eq_predicate": 0.0,
"contradiction_candidate": 0.0,
"deeper_chain_case": 1.0,
"direct_support": 0.0,
"has_distractor": 0.0,
"identity_candidate": 0.0,
"identity_path_exists": 0.0,
"malformed_candidate": 0.0,
"no_against_transitive_support": 0.0,
"parseable_relation": 1.0,
"premise_count": 3.0,
"reverse_path": 1.0,
"some_to_all_risk": 0.0,
"support_depth": 0.0,
"transitive_support": 0.0,
"unsupported_relation_candidate": 0.0
},
"prediction": {
"channels": [
"directionality"
],
"model_confidence": 0.0171,
"ranking_score": -4.0544,
"resolver": "reject_reverse",
"status": "rejected"
},
"raw_output": "All P are M",
"source": "learned_candidate_dataset"
}
],
"tags": [
"deeper_chain",
"high_confidence_wrong"
],
"top_candidate_id": "valid_m_p",
"trace_schema_valid": true,
"verification": {
"abstained": [],
"accepted": [
"All M are P"
],
"candidate_results": [
{
"candidate_id": "valid_m_p",
"channel_trace": {
"confidence_abstention": {
"activated": true,
"details": {
"decision": "answer",
"event": {
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "abstained_or_answered"
},
"contradiction": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"directionality": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"identity_preservation": {
"activated": true,
"details": {
"event": {
"action": "preserved_distinct_nodes",
"channel": "identity_preservation",
"details": {},
"evidence": [
"M!=P"
],
"status": "resolved",
"target": "M!=P",
"tension_delta": -1.0
},
"protects_against": "relation_identity_collapse"
},
"evidence": [
"('M', 'P')"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "preserved_distinct_nodes"
},
"logic_transitivity": {
"activated": true,
"details": {
"event": {
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"M->N->O",
"N->O->P",
"M->N->P"
],
"status": "resolved",
"target": "M->O, N->P, M->P",
"tension_delta": -3.0
},
"missing_inferences": 0
},
"evidence": [
"M->N->O",
"N->O->P"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "added_inferred_edge"
},
"quantifier_scope": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"surface_structure": {
"activated": true,
"details": {
"event": {
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 6
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
"untagged_claim_edges": 0
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "tagged_premise_inferred_candidate_edges"
}
},
"channels": {
"logic_transitivity": "accepted inferred edge"
},
"claim": "All M are P",
"confidence": 0.7622,
"provenance": {
"candidate_id": "valid_m_p",
"confidence": 0.7622,
"metadata": {
"boundary": "learned model proposes/ranks; TS-Reasoner typed channels verify",
"input_candidate_confidence": 0.42,
"model_prediction": {
"channels": [
"logic_transitivity"
],
"model_confidence": 0.7622,
"ranking_score": 1.1648,
"resolver": "accept_transitive",
"status": "accepted"
}
},
"raw_output": "All M are P",
"source": "learned_candidate_model"
},
"reason": "candidate is supported by a typed transitive inference",
"source": "learned_candidate_model",
"status": "accepted",
"typed_runtime": {
"available": true,
"context": {
"abstention": false,
"blocked_edges": [],
"blocked_equalities": [
"M!=P"
],
"contradiction_flagged": false,
"quantifier_scope_blocked": false,
"surface_tags": {
"M->N:all": "premise",
"M->O:all": "inferred",
"M->P:all": "inferred",
"N->O:all": "premise",
"N->P:all": "inferred",
"O->P:all": "premise"
}
},
"global_tension": 0.0,
"resolver_events": [
{
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"M->N->O",
"N->O->P",
"M->N->P"
],
"status": "resolved",
"target": "M->O, N->P, M->P",
"tension_delta": -3.0
},
{
"action": "preserved_distinct_nodes",
"channel": "identity_preservation",
"details": {},
"evidence": [
"M!=P"
],
"status": "resolved",
"target": "M!=P",
"tension_delta": -1.0
},
{
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 6
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
{
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
],
"settled": true
}
},
{
"candidate_id": "bad_identity_m_p",
"channel_trace": {
"confidence_abstention": {
"activated": true,
"details": {
"decision": "answer",
"event": {
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "abstained_or_answered"
},
"contradiction": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"directionality": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"identity_preservation": {
"activated": true,
"details": {
"event": {
"action": "preserved_distinct_nodes",
"channel": "identity_preservation",
"details": {},
"evidence": [
"M!=P"
],
"status": "resolved",
"target": "M!=P",
"tension_delta": -1.0
},
"protects_against": "relation_identity_collapse"
},
"evidence": [
"('M', 'P')"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "preserved_distinct_nodes"
},
"logic_transitivity": {
"activated": true,
"details": {
"event": {
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"M->N->O",
"N->O->P",
"M->N->P"
],
"status": "resolved",
"target": "M->O, N->P, M->P",
"tension_delta": -3.0
},
"missing_inferences": 0
},
"evidence": [
"M->N->O",
"N->O->P"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "added_inferred_edge"
},
"quantifier_scope": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"surface_structure": {
"activated": true,
"details": {
"event": {
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 6
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
"untagged_claim_edges": 0
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "tagged_premise_inferred_candidate_edges"
}
},
"channels": {
"identity_preservation": "blocked identity collapse"
},
"claim": "M equals P",
"confidence": 0.0475,
"provenance": {
"candidate_id": "bad_identity_m_p",
"confidence": 0.0475,
"metadata": {
"boundary": "learned model proposes/ranks; TS-Reasoner typed channels verify",
"input_candidate_confidence": 0.98,
"model_prediction": {
"channels": [
"identity_preservation"
],
"model_confidence": 0.0475,
"ranking_score": -2.9988,
"resolver": "reject_identity",
"status": "rejected"
}
},
"raw_output": "M equals P",
"source": "learned_candidate_model"
},
"reason": "candidate collapses distinct graph nodes",
"source": "learned_candidate_model",
"status": "rejected",
"typed_runtime": {
"available": true,
"context": {
"abstention": false,
"blocked_edges": [],
"blocked_equalities": [
"M!=P"
],
"contradiction_flagged": false,
"quantifier_scope_blocked": false,
"surface_tags": {
"M->N:all": "premise",
"M->O:all": "inferred",
"M->P:all": "inferred",
"N->O:all": "premise",
"N->P:all": "inferred",
"O->P:all": "premise"
}
},
"global_tension": 0.0,
"resolver_events": [
{
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"M->N->O",
"N->O->P",
"M->N->P"
],
"status": "resolved",
"target": "M->O, N->P, M->P",
"tension_delta": -3.0
},
{
"action": "preserved_distinct_nodes",
"channel": "identity_preservation",
"details": {},
"evidence": [
"M!=P"
],
"status": "resolved",
"target": "M!=P",
"tension_delta": -1.0
},
{
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 6
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
{
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
],
"settled": true
}
},
{
"candidate_id": "bad_reverse_p_m",
"channel_trace": {
"confidence_abstention": {
"activated": true,
"details": {
"decision": "answer",
"event": {
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "abstained_or_answered"
},
"contradiction": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"directionality": {
"activated": true,
"details": {
"event": {
"action": "blocked_reverse_inference",
"channel": "directionality",
"details": {},
"evidence": [
"P->M"
],
"status": "resolved",
"target": "P->M",
"tension_delta": -1.0
},
"protects_against": "converse_fallacy"
},
"evidence": [
"P->M"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "blocked_reverse_inference"
},
"identity_preservation": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"logic_transitivity": {
"activated": true,
"details": {
"event": {
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"M->N->O",
"N->O->P",
"M->N->P"
],
"status": "resolved",
"target": "M->O, N->P, M->P",
"tension_delta": -3.0
},
"missing_inferences": 0
},
"evidence": [
"M->N->O",
"N->O->P"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "added_inferred_edge"
},
"quantifier_scope": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"surface_structure": {
"activated": true,
"details": {
"event": {
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 6
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
"untagged_claim_edges": 0
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "tagged_premise_inferred_candidate_edges"
}
},
"channels": {
"directionality": "blocked reverse inference"
},
"claim": "All P are M",
"confidence": 0.0171,
"provenance": {
"candidate_id": "bad_reverse_p_m",
"confidence": 0.0171,
"metadata": {
"boundary": "learned model proposes/ranks; TS-Reasoner typed channels verify",
"input_candidate_confidence": 0.99,
"model_prediction": {
"channels": [
"directionality"
],
"model_confidence": 0.0171,
"ranking_score": -4.0544,
"resolver": "reject_reverse",
"status": "rejected"
}
},
"raw_output": "All P are M",
"source": "learned_candidate_model"
},
"reason": "candidate reverses a directed support path",
"source": "learned_candidate_model",
"status": "rejected",
"typed_runtime": {
"available": true,
"context": {
"abstention": false,
"blocked_edges": [
"P->M"
],
"blocked_equalities": [],
"contradiction_flagged": false,
"quantifier_scope_blocked": false,
"surface_tags": {
"M->N:all": "premise",
"M->O:all": "inferred",
"M->P:all": "inferred",
"N->O:all": "premise",
"N->P:all": "inferred",
"O->P:all": "premise"
}
},
"global_tension": 0.0,
"resolver_events": [
{
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"M->N->O",
"N->O->P",
"M->N->P"
],
"status": "resolved",
"target": "M->O, N->P, M->P",
"tension_delta": -3.0
},
{
"action": "blocked_reverse_inference",
"channel": "directionality",
"details": {},
"evidence": [
"P->M"
],
"status": "resolved",
"target": "P->M",
"tension_delta": -1.0
},
{
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 6
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
{
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
],
"settled": true
}
}
],
"channels": {
"directionality": "blocked reverse inference",
"identity_preservation": "blocked identity collapse",
"logic_transitivity": "accepted inferred edge"
},
"rejected": [
"M equals P",
"All P are M"
]
},
"verifier_beats_confidence_checks": [
true,
true
]
},
{
"abstention_checks": [],
"accepted_support_checks": [
true
],
"bad_rejection_checks": [
true,
true
],
"candidate_graph_contamination_count": 0,
"candidate_ranking_correct": true,
"case_id": "stress_malformed_and_contradictory",
"channel_activation_checks": [
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true
],
"deeper_chain_checks": [],
"is_distractor": false,
"resolver_prediction_checks": [
true,
true,
true
],
"scored_candidates": [
{
"candidate_id": "valid_cats_animals",
"claim": "All cats are animals",
"confidence": 0.53,
"features": {
"accepted_relation_candidate": 1.0,
"bias": 1.0,
"candidate_confidence": 0.53,
"candidate_quantifier_all": 1.0,
"candidate_quantifier_no": 0.0,
"candidate_quantifier_some": 0.0,
"candidate_subject_eq_predicate": 0.0,
"contradiction_candidate": 0.0,
"deeper_chain_case": 0.0,
"direct_support": 0.0,
"has_distractor": 0.0,
"identity_candidate": 0.0,
"identity_path_exists": 0.0,
"malformed_candidate": 0.0,
"no_against_transitive_support": 0.0,
"parseable_relation": 1.0,
"premise_count": 2.0,
"reverse_path": 0.0,
"some_to_all_risk": 0.0,
"support_depth": 2.0,
"transitive_support": 1.0,
"unsupported_relation_candidate": 0.0
},
"prediction": {
"channels": [
"logic_transitivity"
],
"model_confidence": 0.6799,
"ranking_score": 0.7532,
"resolver": "accept_transitive",
"status": "accepted"
},
"raw_output": "All cats are animals",
"source": "learned_candidate_dataset"
},
{
"candidate_id": "bad_no_cats_animals",
"claim": "No cats are animals",
"confidence": 0.9,
"features": {
"accepted_relation_candidate": 0.0,
"bias": 1.0,
"candidate_confidence": 0.9,
"candidate_quantifier_all": 0.0,
"candidate_quantifier_no": 1.0,
"candidate_quantifier_some": 0.0,
"candidate_subject_eq_predicate": 0.0,
"contradiction_candidate": 0.0,
"deeper_chain_case": 0.0,
"direct_support": 0.0,
"has_distractor": 0.0,
"identity_candidate": 0.0,
"identity_path_exists": 0.0,
"malformed_candidate": 0.0,
"no_against_transitive_support": 1.0,
"parseable_relation": 1.0,
"premise_count": 2.0,
"reverse_path": 0.0,
"some_to_all_risk": 0.0,
"support_depth": 2.0,
"transitive_support": 1.0,
"unsupported_relation_candidate": 0.0
},
"prediction": {
"channels": [
"contradiction"
],
"model_confidence": 0.2986,
"ranking_score": -0.854,
"resolver": "reject_contradiction",
"status": "rejected"
},
"raw_output": "No cats are animals",
"source": "learned_candidate_dataset"
},
{
"candidate_id": "malformed_cats",
"claim": "cats somehow animalish",
"confidence": 0.88,
"features": {
"accepted_relation_candidate": 0.0,
"bias": 1.0,
"candidate_confidence": 0.88,
"candidate_quantifier_all": 0.0,
"candidate_quantifier_no": 0.0,
"candidate_quantifier_some": 0.0,
"candidate_subject_eq_predicate": 0.0,
"contradiction_candidate": 0.0,
"deeper_chain_case": 0.0,
"direct_support": 0.0,
"has_distractor": 0.0,
"identity_candidate": 0.0,
"identity_path_exists": 0.0,
"malformed_candidate": 1.0,
"no_against_transitive_support": 0.0,
"parseable_relation": 0.0,
"premise_count": 2.0,
"reverse_path": 0.0,
"some_to_all_risk": 0.0,
"support_depth": 0.0,
"transitive_support": 0.0,
"unsupported_relation_candidate": 0.0
},
"prediction": {
"channels": [
"malformed_relation"
],
"model_confidence": 0.096,
"ranking_score": -2.2428,
"resolver": "reject_malformed",
"status": "rejected"
},
"raw_output": "cats somehow animalish",
"source": "learned_candidate_dataset"
}
],
"tags": [
"malformed",
"contradiction",
"high_confidence_wrong"
],
"top_candidate_id": "valid_cats_animals",
"trace_schema_valid": true,
"verification": {
"abstained": [],
"accepted": [
"All cats are animals"
],
"candidate_results": [
{
"candidate_id": "valid_cats_animals",
"channel_trace": {
"confidence_abstention": {
"activated": true,
"details": {
"decision": "answer",
"event": {
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "abstained_or_answered"
},
"contradiction": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"directionality": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"identity_preservation": {
"activated": true,
"details": {
"event": {
"action": "preserved_distinct_nodes",
"channel": "identity_preservation",
"details": {},
"evidence": [
"cats!=animals"
],
"status": "resolved",
"target": "cats!=animals",
"tension_delta": -1.0
},
"protects_against": "relation_identity_collapse"
},
"evidence": [
"('cats', 'animals')"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "preserved_distinct_nodes"
},
"logic_transitivity": {
"activated": true,
"details": {
"event": {
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"cats->mammals->animals"
],
"status": "resolved",
"target": "cats->animals",
"tension_delta": -1.0
},
"missing_inferences": 0
},
"evidence": [
"cats->mammals->animals"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "added_inferred_edge"
},
"quantifier_scope": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"surface_structure": {
"activated": true,
"details": {
"event": {
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 3
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
"untagged_claim_edges": 0
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "tagged_premise_inferred_candidate_edges"
}
},
"channels": {
"logic_transitivity": "accepted inferred edge"
},
"claim": "All cats are animals",
"confidence": 0.6799,
"provenance": {
"candidate_id": "valid_cats_animals",
"confidence": 0.6799,
"metadata": {
"boundary": "learned model proposes/ranks; TS-Reasoner typed channels verify",
"input_candidate_confidence": 0.53,
"model_prediction": {
"channels": [
"logic_transitivity"
],
"model_confidence": 0.6799,
"ranking_score": 0.7532,
"resolver": "accept_transitive",
"status": "accepted"
}
},
"raw_output": "All cats are animals",
"source": "learned_candidate_model"
},
"reason": "candidate is supported by a typed transitive inference",
"source": "learned_candidate_model",
"status": "accepted",
"typed_runtime": {
"available": true,
"context": {
"abstention": false,
"blocked_edges": [],
"blocked_equalities": [
"cats!=animals"
],
"contradiction_flagged": false,
"quantifier_scope_blocked": false,
"surface_tags": {
"cats->animals:all": "inferred",
"cats->mammals:all": "premise",
"mammals->animals:all": "premise"
}
},
"global_tension": 0.0,
"resolver_events": [
{
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"cats->mammals->animals"
],
"status": "resolved",
"target": "cats->animals",
"tension_delta": -1.0
},
{
"action": "preserved_distinct_nodes",
"channel": "identity_preservation",
"details": {},
"evidence": [
"cats!=animals"
],
"status": "resolved",
"target": "cats!=animals",
"tension_delta": -1.0
},
{
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 3
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
{
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
],
"settled": true
}
},
{
"candidate_id": "bad_no_cats_animals",
"channel_trace": {
"confidence_abstention": {
"activated": true,
"details": {
"decision": "answer",
"event": {
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "abstained_or_answered"
},
"contradiction": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"directionality": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"identity_preservation": {
"activated": true,
"details": {
"event": {
"action": "preserved_distinct_nodes",
"channel": "identity_preservation",
"details": {},
"evidence": [
"cats!=animals"
],
"status": "resolved",
"target": "cats!=animals",
"tension_delta": -1.0
},
"protects_against": "relation_identity_collapse"
},
"evidence": [
"('cats', 'animals')"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "preserved_distinct_nodes"
},
"logic_transitivity": {
"activated": true,
"details": {
"event": {
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"cats->mammals->animals"
],
"status": "resolved",
"target": "cats->animals",
"tension_delta": -1.0
},
"missing_inferences": 0
},
"evidence": [
"cats->mammals->animals"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "added_inferred_edge"
},
"quantifier_scope": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"surface_structure": {
"activated": true,
"details": {
"event": {
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 3
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
"untagged_claim_edges": 0
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "tagged_premise_inferred_candidate_edges"
}
},
"channels": {
"contradiction": "rejected candidate contradicts premise edge"
},
"claim": "No cats are animals",
"confidence": 0.2986,
"provenance": {
"candidate_id": "bad_no_cats_animals",
"confidence": 0.2986,
"metadata": {
"boundary": "learned model proposes/ranks; TS-Reasoner typed channels verify",
"input_candidate_confidence": 0.9,
"model_prediction": {
"channels": [
"contradiction"
],
"model_confidence": 0.2986,
"ranking_score": -0.854,
"resolver": "reject_contradiction",
"status": "rejected"
}
},
"raw_output": "No cats are animals",
"source": "learned_candidate_model"
},
"reason": "candidate contradicts a premise-supported edge",
"source": "learned_candidate_model",
"status": "rejected",
"typed_runtime": {
"available": true,
"context": {
"abstention": false,
"blocked_edges": [],
"blocked_equalities": [
"cats!=animals"
],
"contradiction_flagged": false,
"quantifier_scope_blocked": false,
"surface_tags": {
"cats->animals:all": "inferred",
"cats->mammals:all": "premise",
"mammals->animals:all": "premise"
}
},
"global_tension": 0.0,
"resolver_events": [
{
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"cats->mammals->animals"
],
"status": "resolved",
"target": "cats->animals",
"tension_delta": -1.0
},
{
"action": "preserved_distinct_nodes",
"channel": "identity_preservation",
"details": {},
"evidence": [
"cats!=animals"
],
"status": "resolved",
"target": "cats!=animals",
"tension_delta": -1.0
},
{
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 3
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
{
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
],
"settled": true
}
},
{
"candidate_id": "malformed_cats",
"channel_trace": {},
"channels": {
"malformed_relation": "rejected unparsable graph claim"
},
"claim": "cats somehow animalish",
"confidence": 0.096,
"provenance": {
"candidate_id": "malformed_cats",
"confidence": 0.096,
"metadata": {
"boundary": "learned model proposes/ranks; TS-Reasoner typed channels verify",
"input_candidate_confidence": 0.88,
"model_prediction": {
"channels": [
"malformed_relation"
],
"model_confidence": 0.096,
"ranking_score": -2.2428,
"resolver": "reject_malformed",
"status": "rejected"
}
},
"raw_output": "cats somehow animalish",
"source": "learned_candidate_model"
},
"reason": "candidate claim could not be parsed as a graph relation",
"source": "learned_candidate_model",
"status": "rejected",
"typed_runtime": {
"available": false,
"settled": false
}
}
],
"channels": {
"contradiction": "rejected candidate contradicts premise edge",
"logic_transitivity": "accepted inferred edge",
"malformed_relation": "rejected unparsable graph claim"
},
"rejected": [
"No cats are animals",
"cats somehow animalish"
]
},
"verifier_beats_confidence_checks": [
true,
true
]
},
{
"abstention_checks": [
true
],
"accepted_support_checks": [
true
],
"bad_rejection_checks": [
true
],
"candidate_graph_contamination_count": 0,
"candidate_ranking_correct": true,
"case_id": "stress_distractor_chain",
"channel_activation_checks": [
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
false,
true,
true,
true,
true,
true
],
"deeper_chain_checks": [
true
],
"is_distractor": true,
"resolver_prediction_checks": [
true,
true,
true
],
"scored_candidates": [
{
"candidate_id": "valid_a_d_with_distractor",
"claim": "All A are D",
"confidence": 0.49,
"features": {
"accepted_relation_candidate": 1.0,
"bias": 1.0,
"candidate_confidence": 0.49,
"candidate_quantifier_all": 1.0,
"candidate_quantifier_no": 0.0,
"candidate_quantifier_some": 0.0,
"candidate_subject_eq_predicate": 0.0,
"contradiction_candidate": 0.0,
"deeper_chain_case": 1.0,
"direct_support": 0.0,
"has_distractor": 1.0,
"identity_candidate": 0.0,
"identity_path_exists": 0.0,
"malformed_candidate": 0.0,
"no_against_transitive_support": 0.0,
"parseable_relation": 1.0,
"premise_count": 5.0,
"reverse_path": 0.0,
"some_to_all_risk": 0.0,
"support_depth": 3.0,
"transitive_support": 1.0,
"unsupported_relation_candidate": 0.0
},
"prediction": {
"channels": [
"logic_transitivity"
],
"model_confidence": 0.4318,
"ranking_score": -0.2744,
"resolver": "accept_transitive",
"status": "accepted"
},
"raw_output": "All A are D",
"source": "learned_candidate_dataset"
},
{
"candidate_id": "wrong_r_d",
"claim": "All R are D",
"confidence": 0.89,
"features": {
"accepted_relation_candidate": 0.0,
"bias": 1.0,
"candidate_confidence": 0.89,
"candidate_quantifier_all": 1.0,
"candidate_quantifier_no": 0.0,
"candidate_quantifier_some": 0.0,
"candidate_subject_eq_predicate": 0.0,
"contradiction_candidate": 0.0,
"deeper_chain_case": 1.0,
"direct_support": 0.0,
"has_distractor": 1.0,
"identity_candidate": 0.0,
"identity_path_exists": 0.0,
"malformed_candidate": 0.0,
"no_against_transitive_support": 0.0,
"parseable_relation": 1.0,
"premise_count": 5.0,
"reverse_path": 0.0,
"some_to_all_risk": 0.0,
"support_depth": 0.0,
"transitive_support": 0.0,
"unsupported_relation_candidate": 1.0
},
"prediction": {
"channels": [
"typed_support"
],
"model_confidence": 0.009,
"ranking_score": -4.6984,
"resolver": "abstain_unsupported",
"status": "abstained"
},
"raw_output": "All R are D",
"source": "learned_candidate_dataset"
},
{
"candidate_id": "wrong_reverse_d_a",
"claim": "All D are A",
"confidence": 0.91,
"features": {
"accepted_relation_candidate": 0.0,
"bias": 1.0,
"candidate_confidence": 0.91,
"candidate_quantifier_all": 1.0,
"candidate_quantifier_no": 0.0,
"candidate_quantifier_some": 0.0,
"candidate_subject_eq_predicate": 0.0,
"contradiction_candidate": 0.0,
"deeper_chain_case": 1.0,
"direct_support": 0.0,
"has_distractor": 1.0,
"identity_candidate": 0.0,
"identity_path_exists": 0.0,
"malformed_candidate": 0.0,
"no_against_transitive_support": 0.0,
"parseable_relation": 1.0,
"premise_count": 5.0,
"reverse_path": 1.0,
"some_to_all_risk": 0.0,
"support_depth": 0.0,
"transitive_support": 0.0,
"unsupported_relation_candidate": 0.0
},
"prediction": {
"channels": [],
"model_confidence": 0.0045,
"ranking_score": -5.4096,
"resolver": "reject_reverse",
"status": "rejected"
},
"raw_output": "All D are A",
"source": "learned_candidate_dataset"
}
],
"tags": [
"deeper_chain",
"distractor",
"high_confidence_wrong"
],
"top_candidate_id": "valid_a_d_with_distractor",
"trace_schema_valid": true,
"verification": {
"abstained": [
"All R are D"
],
"accepted": [
"All A are D"
],
"candidate_results": [
{
"candidate_id": "valid_a_d_with_distractor",
"channel_trace": {
"confidence_abstention": {
"activated": true,
"details": {
"decision": "answer",
"event": {
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "abstained_or_answered"
},
"contradiction": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"directionality": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"identity_preservation": {
"activated": true,
"details": {
"event": {
"action": "preserved_distinct_nodes",
"channel": "identity_preservation",
"details": {},
"evidence": [
"A!=D"
],
"status": "resolved",
"target": "A!=D",
"tension_delta": -1.0
},
"protects_against": "relation_identity_collapse"
},
"evidence": [
"('A', 'D')"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "preserved_distinct_nodes"
},
"logic_transitivity": {
"activated": true,
"details": {
"event": {
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"A->B->C",
"R->S->T",
"B->C->D",
"A->B->D"
],
"status": "resolved",
"target": "A->C, R->T, B->D, A->D",
"tension_delta": -4.0
},
"missing_inferences": 0
},
"evidence": [
"A->B->C",
"R->S->T",
"B->C->D"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "added_inferred_edge"
},
"quantifier_scope": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"surface_structure": {
"activated": true,
"details": {
"event": {
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 9
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
"untagged_claim_edges": 0
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "tagged_premise_inferred_candidate_edges"
}
},
"channels": {
"logic_transitivity": "accepted inferred edge"
},
"claim": "All A are D",
"confidence": 0.4318,
"provenance": {
"candidate_id": "valid_a_d_with_distractor",
"confidence": 0.4318,
"metadata": {
"boundary": "learned model proposes/ranks; TS-Reasoner typed channels verify",
"input_candidate_confidence": 0.49,
"model_prediction": {
"channels": [
"logic_transitivity"
],
"model_confidence": 0.4318,
"ranking_score": -0.2744,
"resolver": "accept_transitive",
"status": "accepted"
}
},
"raw_output": "All A are D",
"source": "learned_candidate_model"
},
"reason": "candidate is supported by a typed transitive inference",
"source": "learned_candidate_model",
"status": "accepted",
"typed_runtime": {
"available": true,
"context": {
"abstention": false,
"blocked_edges": [],
"blocked_equalities": [
"A!=D"
],
"contradiction_flagged": false,
"quantifier_scope_blocked": false,
"surface_tags": {
"A->B:all": "premise",
"A->C:all": "inferred",
"A->D:all": "inferred",
"B->C:all": "premise",
"B->D:all": "inferred",
"C->D:all": "premise",
"R->S:all": "premise",
"R->T:all": "inferred",
"S->T:all": "premise"
}
},
"global_tension": 0.0,
"resolver_events": [
{
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"A->B->C",
"R->S->T",
"B->C->D",
"A->B->D"
],
"status": "resolved",
"target": "A->C, R->T, B->D, A->D",
"tension_delta": -4.0
},
{
"action": "preserved_distinct_nodes",
"channel": "identity_preservation",
"details": {},
"evidence": [
"A!=D"
],
"status": "resolved",
"target": "A!=D",
"tension_delta": -1.0
},
{
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 9
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
{
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
],
"settled": true
}
},
{
"candidate_id": "wrong_r_d",
"channel_trace": {
"confidence_abstention": {
"activated": true,
"details": {
"decision": "answer",
"event": {
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "abstained_or_answered"
},
"contradiction": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"directionality": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"identity_preservation": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"logic_transitivity": {
"activated": true,
"details": {
"event": {
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"A->B->C",
"R->S->T",
"B->C->D",
"A->B->D"
],
"status": "resolved",
"target": "A->C, R->T, B->D, A->D",
"tension_delta": -4.0
},
"missing_inferences": 0
},
"evidence": [
"A->B->C",
"R->S->T",
"B->C->D"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "added_inferred_edge"
},
"quantifier_scope": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"surface_structure": {
"activated": true,
"details": {
"event": {
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 9
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
"untagged_claim_edges": 0
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "tagged_premise_inferred_candidate_edges"
}
},
"channels": {
"typed_support": "abstained no accepted channel support"
},
"claim": "All R are D",
"confidence": 0.009,
"provenance": {
"candidate_id": "wrong_r_d",
"confidence": 0.009,
"metadata": {
"boundary": "learned model proposes/ranks; TS-Reasoner typed channels verify",
"input_candidate_confidence": 0.89,
"model_prediction": {
"channels": [
"typed_support"
],
"model_confidence": 0.009,
"ranking_score": -4.6984,
"resolver": "abstain_unsupported",
"status": "abstained"
}
},
"raw_output": "All R are D",
"source": "learned_candidate_model"
},
"reason": "no typed channel produced support or a typed rejection",
"source": "learned_candidate_model",
"status": "abstained",
"typed_runtime": {
"available": true,
"context": {
"abstention": false,
"blocked_edges": [],
"blocked_equalities": [],
"contradiction_flagged": false,
"quantifier_scope_blocked": false,
"surface_tags": {
"A->B:all": "premise",
"A->C:all": "inferred",
"A->D:all": "inferred",
"B->C:all": "premise",
"B->D:all": "inferred",
"C->D:all": "premise",
"R->S:all": "premise",
"R->T:all": "inferred",
"S->T:all": "premise"
}
},
"global_tension": 0.0,
"resolver_events": [
{
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"A->B->C",
"R->S->T",
"B->C->D",
"A->B->D"
],
"status": "resolved",
"target": "A->C, R->T, B->D, A->D",
"tension_delta": -4.0
},
{
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 9
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
{
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
],
"settled": true
}
},
{
"candidate_id": "wrong_reverse_d_a",
"channel_trace": {
"confidence_abstention": {
"activated": true,
"details": {
"decision": "answer",
"event": {
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "abstained_or_answered"
},
"contradiction": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"directionality": {
"activated": true,
"details": {
"event": {
"action": "blocked_reverse_inference",
"channel": "directionality",
"details": {},
"evidence": [
"D->A"
],
"status": "resolved",
"target": "D->A",
"tension_delta": -1.0
},
"protects_against": "converse_fallacy"
},
"evidence": [
"D->A"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "blocked_reverse_inference"
},
"identity_preservation": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"logic_transitivity": {
"activated": true,
"details": {
"event": {
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"A->B->C",
"R->S->T",
"B->C->D",
"A->B->D"
],
"status": "resolved",
"target": "A->C, R->T, B->D, A->D",
"tension_delta": -4.0
},
"missing_inferences": 0
},
"evidence": [
"A->B->C",
"R->S->T",
"B->C->D"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "added_inferred_edge"
},
"quantifier_scope": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"surface_structure": {
"activated": true,
"details": {
"event": {
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 9
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
"untagged_claim_edges": 0
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "tagged_premise_inferred_candidate_edges"
}
},
"channels": {
"directionality": "blocked reverse inference"
},
"claim": "All D are A",
"confidence": 0.0045,
"provenance": {
"candidate_id": "wrong_reverse_d_a",
"confidence": 0.0045,
"metadata": {
"boundary": "learned model proposes/ranks; TS-Reasoner typed channels verify",
"input_candidate_confidence": 0.91,
"model_prediction": {
"channels": [],
"model_confidence": 0.0045,
"ranking_score": -5.4096,
"resolver": "reject_reverse",
"status": "rejected"
}
},
"raw_output": "All D are A",
"source": "learned_candidate_model"
},
"reason": "candidate reverses a directed support path",
"source": "learned_candidate_model",
"status": "rejected",
"typed_runtime": {
"available": true,
"context": {
"abstention": false,
"blocked_edges": [
"D->A"
],
"blocked_equalities": [],
"contradiction_flagged": false,
"quantifier_scope_blocked": false,
"surface_tags": {
"A->B:all": "premise",
"A->C:all": "inferred",
"A->D:all": "inferred",
"B->C:all": "premise",
"B->D:all": "inferred",
"C->D:all": "premise",
"R->S:all": "premise",
"R->T:all": "inferred",
"S->T:all": "premise"
}
},
"global_tension": 0.0,
"resolver_events": [
{
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"A->B->C",
"R->S->T",
"B->C->D",
"A->B->D"
],
"status": "resolved",
"target": "A->C, R->T, B->D, A->D",
"tension_delta": -4.0
},
{
"action": "blocked_reverse_inference",
"channel": "directionality",
"details": {},
"evidence": [
"D->A"
],
"status": "resolved",
"target": "D->A",
"tension_delta": -1.0
},
{
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 9
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
{
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
],
"settled": true
}
}
],
"channels": {
"directionality": "blocked reverse inference",
"logic_transitivity": "accepted inferred edge",
"typed_support": "abstained no accepted channel support"
},
"rejected": [
"All D are A"
]
},
"verifier_beats_confidence_checks": [
true
]
},
{
"abstention_checks": [
true,
true
],
"accepted_support_checks": [],
"bad_rejection_checks": [],
"candidate_graph_contamination_count": 0,
"candidate_ranking_correct": true,
"case_id": "stress_unsupported_abstention",
"channel_activation_checks": [
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true
],
"deeper_chain_checks": [],
"is_distractor": false,
"resolver_prediction_checks": [
true,
true
],
"scored_candidates": [
{
"candidate_id": "unsupported_a_z",
"claim": "All A are Z",
"confidence": 0.86,
"features": {
"accepted_relation_candidate": 0.0,
"bias": 1.0,
"candidate_confidence": 0.86,
"candidate_quantifier_all": 1.0,
"candidate_quantifier_no": 0.0,
"candidate_quantifier_some": 0.0,
"candidate_subject_eq_predicate": 0.0,
"contradiction_candidate": 0.0,
"deeper_chain_case": 0.0,
"direct_support": 0.0,
"has_distractor": 0.0,
"identity_candidate": 0.0,
"identity_path_exists": 0.0,
"malformed_candidate": 0.0,
"no_against_transitive_support": 0.0,
"parseable_relation": 1.0,
"premise_count": 2.0,
"reverse_path": 0.0,
"some_to_all_risk": 0.0,
"support_depth": 0.0,
"transitive_support": 0.0,
"unsupported_relation_candidate": 1.0
},
"prediction": {
"channels": [
"typed_support"
],
"model_confidence": 0.0703,
"ranking_score": -2.5816,
"resolver": "abstain_unsupported",
"status": "abstained"
},
"raw_output": "All A are Z",
"source": "learned_candidate_dataset"
},
{
"candidate_id": "bad_reverse_z_a",
"claim": "All Z are A",
"confidence": 0.92,
"features": {
"accepted_relation_candidate": 0.0,
"bias": 1.0,
"candidate_confidence": 0.92,
"candidate_quantifier_all": 1.0,
"candidate_quantifier_no": 0.0,
"candidate_quantifier_some": 0.0,
"candidate_subject_eq_predicate": 0.0,
"contradiction_candidate": 0.0,
"deeper_chain_case": 0.0,
"direct_support": 0.0,
"has_distractor": 0.0,
"identity_candidate": 0.0,
"identity_path_exists": 0.0,
"malformed_candidate": 0.0,
"no_against_transitive_support": 0.0,
"parseable_relation": 1.0,
"premise_count": 2.0,
"reverse_path": 0.0,
"some_to_all_risk": 0.0,
"support_depth": 0.0,
"transitive_support": 0.0,
"unsupported_relation_candidate": 1.0
},
"prediction": {
"channels": [
"typed_support"
],
"model_confidence": 0.0682,
"ranking_score": -2.6152,
"resolver": "abstain_unsupported",
"status": "abstained"
},
"raw_output": "All Z are A",
"source": "learned_candidate_dataset"
}
],
"tags": [
"unsupported",
"high_confidence_wrong"
],
"top_candidate_id": "unsupported_a_z",
"trace_schema_valid": true,
"verification": {
"abstained": [
"All A are Z",
"All Z are A"
],
"accepted": [],
"candidate_results": [
{
"candidate_id": "unsupported_a_z",
"channel_trace": {
"confidence_abstention": {
"activated": true,
"details": {
"decision": "answer",
"event": {
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "abstained_or_answered"
},
"contradiction": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"directionality": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"identity_preservation": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"logic_transitivity": {
"activated": true,
"details": {
"event": {
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"A->B->C"
],
"status": "resolved",
"target": "A->C",
"tension_delta": -1.0
},
"missing_inferences": 0
},
"evidence": [
"A->B->C"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "added_inferred_edge"
},
"quantifier_scope": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"surface_structure": {
"activated": true,
"details": {
"event": {
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 3
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
"untagged_claim_edges": 0
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "tagged_premise_inferred_candidate_edges"
}
},
"channels": {
"typed_support": "abstained no accepted channel support"
},
"claim": "All A are Z",
"confidence": 0.0703,
"provenance": {
"candidate_id": "unsupported_a_z",
"confidence": 0.0703,
"metadata": {
"boundary": "learned model proposes/ranks; TS-Reasoner typed channels verify",
"input_candidate_confidence": 0.86,
"model_prediction": {
"channels": [
"typed_support"
],
"model_confidence": 0.0703,
"ranking_score": -2.5816,
"resolver": "abstain_unsupported",
"status": "abstained"
}
},
"raw_output": "All A are Z",
"source": "learned_candidate_model"
},
"reason": "no typed channel produced support or a typed rejection",
"source": "learned_candidate_model",
"status": "abstained",
"typed_runtime": {
"available": true,
"context": {
"abstention": false,
"blocked_edges": [],
"blocked_equalities": [],
"contradiction_flagged": false,
"quantifier_scope_blocked": false,
"surface_tags": {
"A->B:all": "premise",
"A->C:all": "inferred",
"B->C:all": "premise"
}
},
"global_tension": 0.0,
"resolver_events": [
{
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"A->B->C"
],
"status": "resolved",
"target": "A->C",
"tension_delta": -1.0
},
{
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 3
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
{
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
],
"settled": true
}
},
{
"candidate_id": "bad_reverse_z_a",
"channel_trace": {
"confidence_abstention": {
"activated": true,
"details": {
"decision": "answer",
"event": {
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "abstained_or_answered"
},
"contradiction": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"directionality": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"identity_preservation": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"logic_transitivity": {
"activated": true,
"details": {
"event": {
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"A->B->C"
],
"status": "resolved",
"target": "A->C",
"tension_delta": -1.0
},
"missing_inferences": 0
},
"evidence": [
"A->B->C"
],
"final_tension": 0.0,
"initial_tension": 1.0,
"resolution": "added_inferred_edge"
},
"quantifier_scope": {
"activated": false,
"details": {},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "not_activated"
},
"surface_structure": {
"activated": true,
"details": {
"event": {
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 3
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
"untagged_claim_edges": 0
},
"evidence": [],
"final_tension": 0.0,
"initial_tension": 0.0,
"resolution": "tagged_premise_inferred_candidate_edges"
}
},
"channels": {
"typed_support": "abstained no accepted channel support"
},
"claim": "All Z are A",
"confidence": 0.0682,
"provenance": {
"candidate_id": "bad_reverse_z_a",
"confidence": 0.0682,
"metadata": {
"boundary": "learned model proposes/ranks; TS-Reasoner typed channels verify",
"input_candidate_confidence": 0.92,
"model_prediction": {
"channels": [
"typed_support"
],
"model_confidence": 0.0682,
"ranking_score": -2.6152,
"resolver": "abstain_unsupported",
"status": "abstained"
}
},
"raw_output": "All Z are A",
"source": "learned_candidate_model"
},
"reason": "no typed channel produced support or a typed rejection",
"source": "learned_candidate_model",
"status": "abstained",
"typed_runtime": {
"available": true,
"context": {
"abstention": false,
"blocked_edges": [],
"blocked_equalities": [],
"contradiction_flagged": false,
"quantifier_scope_blocked": false,
"surface_tags": {
"A->B:all": "premise",
"A->C:all": "inferred",
"B->C:all": "premise"
}
},
"global_tension": 0.0,
"resolver_events": [
{
"action": "added_inferred_edge",
"channel": "logic_transitivity",
"details": {},
"evidence": [
"A->B->C"
],
"status": "resolved",
"target": "A->C",
"tension_delta": -1.0
},
{
"action": "tagged_premise_inferred_candidate_edges",
"channel": "surface_structure",
"details": {
"tag_count": 3
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": -1.0
},
{
"action": "abstained_or_answered",
"channel": "confidence_abstention",
"details": {
"abstained": false
},
"evidence": [],
"status": "resolved",
"target": null,
"tension_delta": 0.0
}
],
"settled": true
}
}
],
"channels": {
"typed_support": "abstained no accepted channel support"
},
"rejected": []
},
"verifier_beats_confidence_checks": []
}
]
}