{"candidates": [{"candidate_id": "demo_valid_a_d", "claim": "All A are D", "confidence": 0.55, "raw_output": "All A are D", "source": "learned_candidate_dataset"}, {"candidate_id": "demo_bad_reverse_d_a", "claim": "All D are A", "confidence": 0.96, "raw_output": "All D are A", "source": "learned_candidate_dataset"}, {"candidate_id": "demo_bad_identity_a_d", "claim": "A equals D", "confidence": 0.88, "raw_output": "A equals D", "source": "learned_candidate_dataset"}], "case_id": "eval_deeper_grant_demo", "input_text": "All A are B. All B are C. All C are D. Are all A D?", "labels": {"demo_bad_identity_a_d": {"channels": ["identity_preservation"], "claim": "A equals D", "reason": "candidate collapses distinct graph nodes", "resolver": "reject_identity", "status": "rejected"}, "demo_bad_reverse_d_a": {"channels": ["directionality"], "claim": "All D are A", "reason": "candidate reverses a directed support path", "resolver": "reject_reverse", "status": "rejected"}, "demo_valid_a_d": {"channels": ["logic_transitivity"], "claim": "All A are D", "reason": "candidate is supported by a typed transitive inference", "resolver": "accept_transitive", "status": "accepted"}}, "split": "eval", "tags": ["deeper_chain", "grant_demo"]}
{"candidates": [{"candidate_id": "valid_a_d_distractor", "claim": "All A are D", "confidence": 0.6, "raw_output": "All A are D", "source": "learned_candidate_dataset"}, {"candidate_id": "wrong_x_d", "claim": "All X are D", "confidence": 0.8, "raw_output": "All X are D", "source": "learned_candidate_dataset"}, {"candidate_id": "bad_reverse_d_a", "claim": "All D are A", "confidence": 0.93, "raw_output": "All D are A", "source": "learned_candidate_dataset"}], "case_id": "eval_distractor", "input_text": "All A are B. All X are Y. All B are C. All C are D. Are all A D?", "labels": {"bad_reverse_d_a": {"channels": ["directionality"], "claim": "All D are A", "reason": "candidate reverses a directed support path", "resolver": "reject_reverse", "status": "rejected"}, "valid_a_d_distractor": {"channels": ["logic_transitivity"], "claim": "All A are D", "reason": "candidate is supported by a typed transitive inference", "resolver": "accept_transitive", "status": "accepted"}, "wrong_x_d": {"channels": ["typed_support"], "claim": "All X are D", "reason": "no typed channel produced support or a typed rejection", "resolver": "abstain_unsupported", "status": "abstained"}}, "split": "eval", "tags": ["deeper_chain", "distractor"]}
{"candidates": [{"candidate_id": "unsupported_a_d", "claim": "All A are D", "confidence": 0.74, "raw_output": "All A are D", "source": "learned_candidate_dataset"}, {"candidate_id": "premise_a_b", "claim": "All A are B", "confidence": 0.51, "raw_output": "All A are B", "source": "learned_candidate_dataset"}], "case_id": "eval_unsupported_leap", "input_text": "All A are B. Are all A D?", "labels": {"premise_a_b": {"channels": ["surface_structure"], "claim": "All A are B", "reason": "candidate is directly present in the premise graph", "resolver": "accept_premise", "status": "accepted"}, "unsupported_a_d": {"channels": ["typed_support"], "claim": "All A are D", "reason": "no typed channel produced support or a typed rejection", "resolver": "abstain_unsupported", "status": "abstained"}}, "split": "eval", "tags": ["unsupported"]}