{ "task": "P1_verification_v12_fixed", "fix": "V12: Removed double-padding bug from V11. extract_hubert_features() no longer adds extra 2s padding since run_inference() already handles it.", "date": "2026-04-09T01:08:22.753687", "model": "/Users/yanfenma/.openclaw/groups/workspace-oc_90142ae290925820dcbac9717011d4af/models/one_voice_rvc_v2_kaggle_v10.pth", "model_size_mb": 139.3, "pipeline": "edge-tts → HuBERT + pyworld → 2x interp → SynthesizerTrn [V12 FIXED]", "total_tests": 3, "successful": 3, "total_duration_sec": 29.88, "total_time_sec": 129.96, "tests": [ { "test": "greeting", "style": "Casual greeting", "text": "Hello! I am One, the very first NumberBlock! I love counting and playing with all my friends!", "source_file": "/Users/yanfenma/.openclaw/groups/workspace-oc_90142ae290925820dcbac9717011d4af/output/p1_verification_v12_fixed/source/source_greeting.wav", "output_file": "/Users/yanfenma/.openclaw/groups/workspace-oc_90142ae290925820dcbac9717011d4af/output/p1_verification_v12_fixed/v12_test_greeting.wav", "source_duration_sec": 9.24, "duration_sec": 9.22, "duration_ratio": 1.0, "sample_rate": 40000, "f0_up_key": 0, "f0_method": "pyworld_harvest", "pipeline": "edge-tts → HuBERT(transformers) + pyworld → F.interpolate(2x) → SynthesizerTrnMs768NSFsid [V12 FIXED single-pad]", "inference_time_sec": 34.12, "model": "/Users/yanfenma/.openclaw/groups/workspace-oc_90142ae290925820dcbac9717011d4af/models/one_voice_rvc_v2_kaggle_v10.pth", "model_info": "RVC v2 official | 200 epochs | 6087 samples | Tesla P100-PCIE-16GB | cuda | V10", "status": "success" }, { "test": "counting", "style": "Educational counting", "text": "One, two, three, four, five! Let's count together! One plus one equals two! That's math magic!", "source_file": "/Users/yanfenma/.openclaw/groups/workspace-oc_90142ae290925820dcbac9717011d4af/output/p1_verification_v12_fixed/source/source_counting.wav", "output_file": "/Users/yanfenma/.openclaw/groups/workspace-oc_90142ae290925820dcbac9717011d4af/output/p1_verification_v12_fixed/v12_test_counting.wav", "source_duration_sec": 11.21, "duration_sec": 11.2, "duration_ratio": 1.0, "sample_rate": 40000, "f0_up_key": 0, "f0_method": "pyworld_harvest", "pipeline": "edge-tts → HuBERT(transformers) + pyworld → F.interpolate(2x) → SynthesizerTrnMs768NSFsid [V12 FIXED single-pad]", "inference_time_sec": 36.87, "model": "/Users/yanfenma/.openclaw/groups/workspace-oc_90142ae290925820dcbac9717011d4af/models/one_voice_rvc_v2_kaggle_v10.pth", "model_info": "RVC v2 official | 200 epochs | 6087 samples | Tesla P100-PCIE-16GB | cuda | V10", "status": "success" }, { "test": "emotional", "style": "Excited/emotional", "text": "I'm so happy today! The sun is shining and we're going to have a wonderful adventure! Let's go, everyone!", "source_file": "/Users/yanfenma/.openclaw/groups/workspace-oc_90142ae290925820dcbac9717011d4af/output/p1_verification_v12_fixed/source/source_emotional.wav", "output_file": "/Users/yanfenma/.openclaw/groups/workspace-oc_90142ae290925820dcbac9717011d4af/output/p1_verification_v12_fixed/v12_test_emotional.wav", "source_duration_sec": 9.48, "duration_sec": 9.46, "duration_ratio": 1.0, "sample_rate": 40000, "f0_up_key": 2, "f0_method": "pyworld_harvest", "pipeline": "edge-tts → HuBERT(transformers) + pyworld → F.interpolate(2x) → SynthesizerTrnMs768NSFsid [V12 FIXED single-pad]", "inference_time_sec": 38.07, "model": "/Users/yanfenma/.openclaw/groups/workspace-oc_90142ae290925820dcbac9717011d4af/models/one_voice_rvc_v2_kaggle_v10.pth", "model_info": "RVC v2 official | 200 epochs | 6087 samples | Tesla P100-PCIE-16GB | cuda | V10", "status": "success" } ] }