{"candidates": [{"candidate_id": "valid_a_c", "claim": "All A are C", "confidence": 0.64, "raw_output": "All A are C", "source": "learned_candidate_dataset"}, {"candidate_id": "bad_reverse_c_a", "claim": "All C are A", "confidence": 0.91, "raw_output": "All C are A", "source": "learned_candidate_dataset"}, {"candidate_id": "bad_identity_a_c", "claim": "A equals C", "confidence": 0.83, "raw_output": "A equals C", "source": "learned_candidate_dataset"}, {"candidate_id": "unsupported_a_d", "claim": "All A are D", "confidence": 0.44, "raw_output": "All A are D", "source": "learned_candidate_dataset"}], "case_id": "train_basic_transitive", "input_text": "All A are B. All B are C. Are all A C?", "labels": {"bad_identity_a_c": {"channels": ["identity_preservation"], "claim": "A equals C", "reason": "candidate collapses distinct graph nodes", "resolver": "reject_identity", "status": "rejected"}, "bad_reverse_c_a": {"channels": ["directionality"], "claim": "All C are A", "reason": "candidate reverses a directed support path", "resolver": "reject_reverse", "status": "rejected"}, "unsupported_a_d": {"channels": ["typed_support"], "claim": "All A are D", "reason": "no typed channel produced support or a typed rejection", "resolver": "abstain_unsupported", "status": "abstained"}, "valid_a_c": {"channels": ["logic_transitivity"], "claim": "All A are C", "reason": "candidate is supported by a typed transitive inference", "resolver": "accept_transitive", "status": "accepted"}}, "split": "train", "tags": ["transitive"]}
{"candidates": [{"candidate_id": "bad_all_pilots_careful", "claim": "All pilots are careful", "confidence": 0.82, "raw_output": "All pilots are careful", "source": "learned_candidate_dataset"}, {"candidate_id": "weak_some_pilots_careful", "claim": "Some pilots are careful", "confidence": 0.48, "raw_output": "Some pilots are careful", "source": "learned_candidate_dataset"}], "case_id": "train_some_to_all", "input_text": "Some pilots are engineers. All engineers are careful. Are all pilots careful?", "labels": {"bad_all_pilots_careful": {"channels": ["quantifier_scope"], "claim": "All pilots are careful", "reason": "candidate upgrades existential support into a universal claim", "resolver": "reject_quantifier", "status": "rejected"}, "weak_some_pilots_careful": {"channels": ["typed_support"], "claim": "Some pilots are careful", "reason": "no typed channel produced support or a typed rejection", "resolver": "abstain_unsupported", "status": "abstained"}}, "split": "train", "tags": ["quantifier"]}
{"candidates": [{"candidate_id": "valid_a_b", "claim": "All A are B", "confidence": 0.62, "raw_output": "All A are B", "source": "learned_candidate_dataset"}, {"candidate_id": "bad_no_a_b", "claim": "No A are B", "confidence": 0.94, "raw_output": "No A are B", "source": "learned_candidate_dataset"}], "case_id": "train_contradiction", "input_text": "All A are B. Are all A B?", "labels": {"bad_no_a_b": {"channels": ["contradiction"], "claim": "No A are B", "reason": "candidate contradicts a premise-supported edge", "resolver": "reject_contradiction", "status": "rejected"}, "valid_a_b": {"channels": ["surface_structure"], "claim": "All A are B", "reason": "candidate is directly present in the premise graph", "resolver": "accept_premise", "status": "accepted"}}, "split": "train", "tags": ["contradiction"]}
{"candidates": [{"candidate_id": "valid_a_d", "claim": "All A are D", "confidence": 0.58, "raw_output": "All A are D", "source": "learned_candidate_dataset"}, {"candidate_id": "bad_reverse_d_a", "claim": "All D are A", "confidence": 0.95, "raw_output": "All D are A", "source": "learned_candidate_dataset"}, {"candidate_id": "bad_identity_a_d", "claim": "A equals D", "confidence": 0.87, "raw_output": "A equals D", "source": "learned_candidate_dataset"}], "case_id": "train_deeper_chain", "input_text": "All A are B. All B are C. All C are D. Are all A D?", "labels": {"bad_identity_a_d": {"channels": ["identity_preservation"], "claim": "A equals D", "reason": "candidate collapses distinct graph nodes", "resolver": "reject_identity", "status": "rejected"}, "bad_reverse_d_a": {"channels": ["directionality"], "claim": "All D are A", "reason": "candidate reverses a directed support path", "resolver": "reject_reverse", "status": "rejected"}, "valid_a_d": {"channels": ["logic_transitivity"], "claim": "All A are D", "reason": "candidate is supported by a typed transitive inference", "resolver": "accept_transitive", "status": "accepted"}}, "split": "train", "tags": ["deeper_chain"]}
{"candidates": [{"candidate_id": "malformed", "claim": "A therefore C probably", "confidence": 0.77, "raw_output": "A therefore C probably", "source": "learned_candidate_dataset"}], "case_id": "train_malformed", "input_text": "All A are B. All B are C. Are all A C?", "labels": {"malformed": {"channels": ["malformed_relation"], "claim": "A therefore C probably", "reason": "candidate claim could not be parsed as a graph relation", "resolver": "reject_malformed", "status": "rejected"}}, "split": "train", "tags": ["malformed"]}
{"candidates": [{"candidate_id": "valid_cats_animals", "claim": "All cats are animals", "confidence": 0.56, "raw_output": "All cats are animals", "source": "learned_candidate_dataset"}, {"candidate_id": "bad_no_cats_animals", "claim": "No cats are animals", "confidence": 0.9, "raw_output": "No cats are animals", "source": "learned_candidate_dataset"}], "case_id": "train_no_against_transitive_support", "input_text": "All cats are mammals. All mammals are animals. Are all cats animals?", "labels": {"bad_no_cats_animals": {"channels": ["contradiction"], "claim": "No cats are animals", "reason": "candidate contradicts a premise-supported edge", "resolver": "reject_contradiction", "status": "rejected"}, "valid_cats_animals": {"channels": ["logic_transitivity"], "claim": "All cats are animals", "reason": "candidate is supported by a typed transitive inference", "resolver": "accept_transitive", "status": "accepted"}}, "split": "train", "tags": ["contradiction"]}
{"candidates": [{"candidate_id": "valid_a_c", "claim": "All A are C", "confidence": 0.57, "raw_output": "All A are C", "source": "learned_candidate_dataset"}, {"candidate_id": "unsupported_x_c", "claim": "All X are C", "confidence": 0.84, "raw_output": "All X are C", "source": "learned_candidate_dataset"}], "case_id": "train_distractor_unsupported", "input_text": "All A are B. All X are Y. All B are C. Are all A C?", "labels": {"unsupported_x_c": {"channels": ["typed_support"], "claim": "All X are C", "reason": "no typed channel produced support or a typed rejection", "resolver": "abstain_unsupported", "status": "abstained"}, "valid_a_c": {"channels": ["logic_transitivity"], "claim": "All A are C", "reason": "candidate is supported by a typed transitive inference", "resolver": "accept_transitive", "status": "accepted"}}, "split": "train", "tags": ["distractor", "high_confidence_wrong"]}