{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [ { "file_id": "1K_Yg4gj70chc1PysQ_28BdoXaZS4A8jC", "timestamp": 1777461644892 } ], "gpuType": "T4", "authorship_tag": "ABX9TyODVgqfIWGZS79m60u1MP49" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-pHYWMO__949", "executionInfo": { "status": "ok", "timestamp": 1777411874741, "user_tz": 420, "elapsed": 19020, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "aa005fd5-b5d6-47a1-948b-09fff7a2da00" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "DRIVE_ROOT = '/content/drive/MyDrive/deepfake_audio'\n", "RAW_2019 = os.path.join(DRIVE_ROOT, 'data/raw/asvspoof_2019')\n", "\n", "# Check the folder we created in Phase 1 still exists\n", "if os.path.exists(RAW_2019):\n", " print(f\"\u2705 Found target folder: {RAW_2019}\")\n", " contents = os.listdir(RAW_2019)\n", " if contents:\n", " print(f\" Contents: {contents}\")\n", " else:\n", " print(f\" (folder is empty, ready for download)\")\n", "else:\n", " print(f\"\u274c Folder not found, creating it now...\")\n", " os.makedirs(RAW_2019, exist_ok=True)\n", " print(f\"\u2705 Created: {RAW_2019}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "eer9nqWfAO5G", "executionInfo": { "status": "ok", "timestamp": 1777411882809, "user_tz": 420, "elapsed": 1206, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "2f0ab280-6218-4f6a-c974-7a878b2c275f" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u2705 Found target folder: /content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019\n", " (folder is empty, ready for download)\n" ] } ] }, { "cell_type": "code", "source": [ "from google.colab import files\n", "print(\"Click 'Choose Files' and select kaggle.json from your Downloads folder\")\n", "uploaded = files.upload()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 90 }, "id": "MMlQD4aQoLHN", "executionInfo": { "status": "ok", "timestamp": 1777455936367, "user_tz": 420, "elapsed": 11370, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "b746aebb-342d-42de-ec53-6bf241bc9d39" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Click 'Choose Files' and select kaggle.json from your Downloads folder\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " \n", " Upload widget is only available when the cell has been executed in the\n", " current browser session. Please rerun this cell to enable.\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Saving kaggle.json to kaggle.json\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "import shutil\n", "\n", "# Move kaggle.json to the standard location\n", "os.makedirs('/root/.kaggle', exist_ok=True)\n", "shutil.move('kaggle.json', '/root/.kaggle/kaggle.json')\n", "\n", "# Set strict permissions (Kaggle CLI requires this)\n", "os.chmod('/root/.kaggle/kaggle.json', 0o600)\n", "\n", "print(\"\u2705 Kaggle credentials configured.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "NaOV8ecRoWlP", "executionInfo": { "status": "ok", "timestamp": 1777455954165, "user_tz": 420, "elapsed": 68, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "567375fe-5513-4d17-e835-9e5e1167228e" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u2705 Kaggle credentials configured.\n" ] } ] }, { "cell_type": "code", "source": [ "!pip install -q kaggle\n", "\n", "# Test that the API is authenticated by listing one of your datasets / competitions\n", "!kaggle datasets list -s asvspoof --max-size 100000000 2>&1 | head -20" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "BnmUkAC2oZlm", "executionInfo": { "status": "ok", "timestamp": 1777455972925, "user_tz": 420, "elapsed": 8095, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "eacef804-495b-4c90-fa6c-3293d90d2a96" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "ref title size lastUpdated downloadCount voteCount usabilityRating \n", "------------------------------------------- -------------------------- ---------- -------------------------- ------------- --------- --------------- \n", "shettysharanyaa/asvspoof-features asvspoof-features 23554403 2026-02-24 16:12:08.220000 0 0 0.1875 \n", "chandajha04/asvspoof-2021 asvspoof-2021 4225 2025-11-08 13:54:33.450000 2 0 0.25 \n", "shettysharanyaa/asvspoof-features-all asvspoof-features-all 11807951 2026-03-15 15:00:38.487000 0 0 0.0625 \n", "beosup/asvspoof-2019-la-samples ASVspoof-2019-LA-samples 249565 2026-01-15 03:06:14.567000 3 0 0.25 \n", "simontrann/asvspoof2021-la-key ASVSpoof2021_LA_Key 21237220 2025-10-09 08:22:47.333000 8 0 0.25 \n", "sukiss/asvspoof-dataset Asvspoof_dataset 5355 2025-08-02 18:57:44.890000 6 0 0.25 \n", "minhbhm/asvspoof19-results asvspoof19_results 1111274 2026-03-30 00:11:32.867000 1 0 0.0625 \n", "sjoshi2/asvspoof-la asvspoof_la 1884618 2026-04-27 06:06:49.930000 1 0 0.125 \n", "muhammedaliirmak/asvspoof19la-d5000-625-625 ASVSpoof19LA_D5000_625_625 59078 2024-09-16 16:34:56.113000 3 0 0.11764706 \n", "minhbhm/asvspoof2021-results asvspoof2021_results 4193010 2026-04-03 09:32:19.507000 1 0 0.0 \n" ] } ] }, { "cell_type": "code", "source": [ "!kaggle datasets list -s \"asvpoof 2019\" 2>&1" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "r_0kNdUQonv9", "executionInfo": { "status": "ok", "timestamp": 1777456024127, "user_tz": 420, "elapsed": 726, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "6b686da4-12d3-41d9-c50a-e6d32ec7f569" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "ref title size lastUpdated downloadCount voteCount usabilityRating \n", "------------------------------------- ------------------------ ----------- -------------------------- ------------- --------- --------------- \n", "awsaf49/asvpoof-2019-dataset ASVspoof 2019 Dataset 25321823144 2022-06-21 17:18:04.330000 13384 55 0.625 \n", "anishsarkar22/asvpoof-2019-dataset-la ASVSpoof 2019 LA Dataset 7646913333 2025-10-13 11:12:09.283000 183 2 0.4375 \n", "sukiss/asvspoof-dataset Asvspoof_dataset 5355 2025-08-02 18:57:44.890000 6 0 0.25 \n" ] } ] }, { "cell_type": "code", "source": [ "!kaggle datasets files anishsarkar22/asvpoof-2019-dataset-la 2>&1 | head -30" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Q82yRDMyo0FG", "executionInfo": { "status": "ok", "timestamp": 1777456074403, "user_tz": 420, "elapsed": 821, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "70620e07-d8a0-47f5-bd2f-8d1c84a50c2d" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Next Page Token = CfDJ8OqP5ZkTT9ZGj66XXRbxXyAum_75qo2ORdFdjBKnb2XktwZtgYwPc79Fj0H8ETrqYRm35yEs_mdLGDUP7txk0cfA7dvSOnp24NvUsf8I5dCG1futcaouZAqMo4gRSGpaBLgpt2tQIGyudTzkPXr4ARKdBB0AxZepoFQlczqr0reYnPjiVrc\n", "name size creationDate \n", "------------------------------------------------------------------------ ------- -------------------------- \n", "LA/ASVspoof2019_LA_asv_protocols/ASVspoof2019.LA.asv.dev.female.trl.txt 677652 2025-10-13 11:13:31.155000 \n", "LA/ASVspoof2019_LA_asv_protocols/ASVspoof2019.LA.asv.dev.female.trn.txt 972 2025-10-13 11:13:31.143000 \n", "LA/ASVspoof2019_LA_asv_protocols/ASVspoof2019.LA.asv.dev.gi.trl.txt 976804 2025-10-13 11:13:31.148000 \n", "LA/ASVspoof2019_LA_asv_protocols/ASVspoof2019.LA.asv.dev.male.trl.txt 299152 2025-10-13 11:13:31.076000 \n", "LA/ASVspoof2019_LA_asv_protocols/ASVspoof2019.LA.asv.dev.male.trn.txt 1096 2025-10-13 11:13:31.066000 \n", "LA/ASVspoof2019_LA_asv_protocols/ASVspoof2019.LA.asv.eval.female.trl.txt 2371140 2025-10-13 11:13:31.119000 \n", "LA/ASVspoof2019_LA_asv_protocols/ASVspoof2019.LA.asv.eval.female.trn.txt 4374 2025-10-13 11:13:31.134000 \n", "LA/ASVspoof2019_LA_asv_protocols/ASVspoof2019.LA.asv.eval.gi.trl.txt 3512112 2025-10-13 11:13:30.985000 \n", "LA/ASVspoof2019_LA_asv_protocols/ASVspoof2019.LA.asv.eval.male.trl.txt 1140972 2025-10-13 11:13:31.137000 \n", "LA/ASVspoof2019_LA_asv_protocols/ASVspoof2019.LA.asv.eval.male.trn.txt 5754 2025-10-13 11:13:31.118000 \n", "LA/ASVspoof2019_LA_asv_scores/ASVspoof2019.LA.asv.dev.gi.trl.scores.txt 632703 2025-10-13 11:14:49.401000 \n", "LA/ASVspoof2019_LA_asv_scores/ASVspoof2019.LA.asv.eval.gi.trl.scores.txt 2422073 2025-10-13 11:14:49.443000 \n", "LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.dev.trl.txt 822400 2025-10-13 11:14:49.401000 \n", "LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.eval.trl.txt 2358176 2025-10-13 11:14:49.421000 \n", "LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt 840120 2025-10-13 11:14:49.461000 \n", "LA/ASVspoof2019_LA_dev/LICENSE.txt 19941 2025-10-13 11:14:49.401000 \n", "LA/ASVspoof2019_LA_dev/flac/LA_D_1000265.flac 32573 2025-10-13 11:14:40.280000 \n", "LA/ASVspoof2019_LA_dev/flac/LA_D_1000752.flac 38423 2025-10-13 11:14:14.094000 \n", "LA/ASVspoof2019_LA_dev/flac/LA_D_1001095.flac 65292 2025-10-13 11:14:27.990000 \n", "LA/ASVspoof2019_LA_dev/flac/LA_D_1002130.flac 85044 2025-10-13 11:14:40.786000 \n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "# Where the zip will land first (Colab's local fast disk, not Drive)\n", "DOWNLOAD_DIR = '/content/kaggle_download'\n", "os.makedirs(DOWNLOAD_DIR, exist_ok=True)\n", "\n", "# Download to Colab's local disk first \u2014 much faster than writing to Drive\n", "print(\"Starting download (~7.6 GB). This takes 5-15 minutes...\")\n", "!kaggle datasets download -d anishsarkar22/asvpoof-2019-dataset-la -p {DOWNLOAD_DIR} --unzip --force\n", "print(\"\\n\u2705 Download + unzip complete.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "s5yivvZdpJRS", "executionInfo": { "status": "ok", "timestamp": 1777456447312, "user_tz": 420, "elapsed": 284438, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "557ae6c4-d85d-4276-f25c-9919df50d69d" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Starting download (~7.6 GB). This takes 5-15 minutes...\n", "Dataset URL: https://www.kaggle.com/datasets/anishsarkar22/asvpoof-2019-dataset-la\n", "License(s): ODC Attribution License (ODC-By)\n", "Downloading asvpoof-2019-dataset-la.zip to /content/kaggle_download\n", "100% 7.12G/7.12G [03:26<00:00, 37.0MB/s]\n", "\n", "\n", "\u2705 Download + unzip complete.\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "DOWNLOAD_DIR = '/content/kaggle_download'\n", "print(\"Top-level contents:\")\n", "for item in sorted(os.listdir(DOWNLOAD_DIR)):\n", " full = os.path.join(DOWNLOAD_DIR, item)\n", " if os.path.isdir(full):\n", " print(f\" folder {item}/\")\n", " else:\n", " size_mb = os.path.getsize(full) / 1e6\n", " print(f\" file content {item} ({size_mb:.1f} MB)\")\n", "\n", "LA_DIR = os.path.join(DOWNLOAD_DIR, 'LA')\n", "if os.path.exists(LA_DIR):\n", " print(f\"\\nLA/ contents:\")\n", " for item in sorted(os.listdir(LA_DIR)):\n", " print(f\" folder LA/{item}/\")\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nyM7FtPjqXXX", "executionInfo": { "status": "ok", "timestamp": 1777456580774, "user_tz": 420, "elapsed": 22, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "aa4de11f-97a1-49ac-e8e2-76fcc725b323" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Top-level contents:\n", " folder LA/\n", "\n", "LA/ contents:\n", " folder LA/ASVspoof2019_LA_asv_protocols/\n", " folder LA/ASVspoof2019_LA_asv_scores/\n", " folder LA/ASVspoof2019_LA_cm_protocols/\n", " folder LA/ASVspoof2019_LA_dev/\n", " folder LA/ASVspoof2019_LA_eval/\n", " folder LA/ASVspoof2019_LA_train/\n", " folder LA/README.LA.txt/\n" ] } ] }, { "cell_type": "code", "source": [ "import glob\n", "\n", "partitions = ['train', 'dev', 'eval']\n", "expected = {'train': 25380, 'dev': 24844, 'eval': 71237}\n", "\n", "print(\"Audio file counts:\")\n", "for p in partitions:\n", " flac_dir = os.path.join(DOWNLOAD_DIR, 'LA', f'ASVspoof2019_LA_{p}', 'flac')\n", " if os.path.exists(flac_dir):\n", " n = len(glob.glob(os.path.join(flac_dir, '*.flac')))\n", " match = \"Done\" if n == expected[p] else \"Warning\"\n", " print(f\" {match} {p}: {n:,} files (expected {expected[p]:,})\")\n", " else:\n", " print(f\" {p}: folder not found\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "U_OBd01pq4CO", "executionInfo": { "status": "ok", "timestamp": 1777456644915, "user_tz": 420, "elapsed": 210, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "18bf02c6-3aec-4ac4-d959-019fc31cdc80" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Audio file counts:\n", " Done train: 25,380 files (expected 25,380)\n", " Warning dev: 24,986 files (expected 24,844)\n", " Warning eval: 71,933 files (expected 71,237)\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "import time\n", "\n", "SOURCE = '/content/kaggle_download/LA'\n", "DESTINATION = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA'\n", "\n", "os.makedirs(os.path.dirname(DESTINATION), exist_ok=True)\n", "\n", "print(f\"Moving {SOURCE} -> {DESTINATION}\")\n", "print(\"This takes 15-25 minutes due to Drive's per-file overhead.\")\n", "print(\"Don't close the tab. Progress will print as it goes.\\n\")\n", "\n", "start = time.time()\n", "# rsync is more reliable than cp for large transfers\n", "!rsync -ah --info=progress2 {SOURCE}/ {DESTINATION}/\n", "\n", "elapsed = (time.time() - start) / 60\n", "print(f\"\\nMove complete in {elapsed:.1f} minutes.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ZWnMyY0BraAf", "executionInfo": { "status": "ok", "timestamp": 1777456879390, "user_tz": 420, "elapsed": 85135, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "8c3cf64d-aa4f-4538-b5d4-dd818596116f" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Moving /content/kaggle_download/LA -> /content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA\n", "This takes 15-25 minutes due to Drive's per-file overhead.\n", "Don't close the tab. Progress will print as it goes.\n", "\n", " 7.66G 100% 85.99MB/s 0:01:24 (xfr#122318, to-chk=0/122328)\n", "\n", "Move complete in 1.4 minutes.\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "import glob\n", "\n", "DRIVE_LA = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA'\n", "\n", "# Re-count files on Drive\n", "partitions = ['train', 'dev', 'eval']\n", "expected = {'train': 25380, 'dev': 24986, 'eval': 71933} # using actual counts now\n", "\n", "print(\"Verifying files on Drive:\")\n", "all_ok = True\n", "for p in partitions:\n", " flac_dir = os.path.join(DRIVE_LA, f'ASVspoof2019_LA_{p}', 'flac')\n", " if os.path.exists(flac_dir):\n", " n = len(glob.glob(os.path.join(flac_dir, '*.flac')))\n", " match = \"OK\" if n == expected[p] else \"MISMATCH\"\n", " print(f\" [{match}] {p}: {n:,} files\")\n", " if n != expected[p]:\n", " all_ok = False\n", " else:\n", " print(f\" [MISSING] {p}: folder not found\")\n", " all_ok = False\n", "\n", "# Verify protocol files are also there\n", "proto_dir = os.path.join(DRIVE_LA, 'ASVspoof2019_LA_cm_protocols')\n", "print(f\"\\nProtocol folder: {proto_dir}\")\n", "if os.path.exists(proto_dir):\n", " for f in sorted(os.listdir(proto_dir)):\n", " size_kb = os.path.getsize(os.path.join(proto_dir, f)) / 1024\n", " print(f\" - {f} ({size_kb:.1f} KB)\")\n", "else:\n", " print(\" MISSING\")\n", " all_ok = False\n", "\n", "print(f\"\\nOverall: {'OK' if all_ok else 'PROBLEM - do not delete local copy yet'}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Xk25n4VUs15X", "executionInfo": { "status": "ok", "timestamp": 1777457131668, "user_tz": 420, "elapsed": 115, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "02b452db-e409-43d5-e0b5-5a1a945741ac" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Verifying files on Drive:\n", " [OK] train: 25,380 files\n", " [OK] dev: 24,986 files\n", " [OK] eval: 71,933 files\n", "\n", "Protocol folder: /content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA/ASVspoof2019_LA_cm_protocols\n", " - ASVspoof2019.LA.cm.dev.trl.txt (803.1 KB)\n", " - ASVspoof2019.LA.cm.eval.trl.txt (2302.9 KB)\n", " - ASVspoof2019.LA.cm.train.trn.txt (820.4 KB)\n", "\n", "Overall: OK\n" ] } ] }, { "cell_type": "code", "source": [ "import shutil\n", "\n", "LOCAL_DOWNLOAD = '/content/kaggle_download'\n", "\n", "if os.path.exists(LOCAL_DOWNLOAD):\n", " print(f\"Removing {LOCAL_DOWNLOAD}...\")\n", " shutil.rmtree(LOCAL_DOWNLOAD)\n", " print(\"Done.\")\n", "else:\n", " print(\"Already removed.\")\n", "\n", "!df -h /content | tail -1" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AKucTc2ktCWD", "executionInfo": { "status": "ok", "timestamp": 1777457184947, "user_tz": 420, "elapsed": 3621, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "86bad200-f4dc-46c8-b9a7-917acd1c04c7" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Removing /content/kaggle_download...\n", "Done.\n", "overlay 236G 51G 186G 22% /\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "DRIVE_ROOT = '/content/drive/MyDrive'\n", "if os.path.exists(DRIVE_ROOT):\n", " print(\"Drive already mounted.\")\n", "else:\n", " from google.colab import drive\n", " drive.mount('/content/drive')\n", "\n", "DRIVE_LA = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA'\n", "print(f\"\\nData folder exists: {os.path.exists(DRIVE_LA)}\")\n", "print(f\"Contents: {sorted(os.listdir(DRIVE_LA))}\")\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ZlfhUYCkteEh", "executionInfo": { "status": "ok", "timestamp": 1777457380469, "user_tz": 420, "elapsed": 11, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "b4019443-a270-4757-a01e-8d05064f3e26" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Drive already mounted.\n", "\n", "Data folder exists: True\n", "Contents: ['ASVspoof2019_LA_asv_protocols', 'ASVspoof2019_LA_asv_scores', 'ASVspoof2019_LA_cm_protocols', 'ASVspoof2019_LA_dev', 'ASVspoof2019_LA_eval', 'ASVspoof2019_LA_train', 'README.LA.txt']\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "PROTO_DIR = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA/ASVspoof2019_LA_cm_protocols'\n", "TRAIN_PROTO = os.path.join(PROTO_DIR, 'ASVspoof2019.LA.cm.train.trn.txt')\n", "\n", "size_mb = os.path.getsize(TRAIN_PROTO) / 1024\n", "print(f\"File: {TRAIN_PROTO}\")\n", "print(f\"Size: {size_mb:.1f} KB\\n\")\n", "\n", "print(\"First 10 lines:\")\n", "print(\"-\" * 70)\n", "with open(TRAIN_PROTO, 'r') as f:\n", " for i, line in enumerate(f):\n", " if i >= 10:\n", " break\n", " print(repr(line))\n", "print(\"-\" * 70)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "XVo_1KtUt5Rm", "executionInfo": { "status": "ok", "timestamp": 1777457409784, "user_tz": 420, "elapsed": 8, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "9b61a25d-176a-4e04-e837-5f87d83db3dc" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "File: /content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt\n", "Size: 820.4 KB\n", "\n", "First 10 lines:\n", "----------------------------------------------------------------------\n", "'LA_0079 LA_T_1138215 - - bonafide\\n'\n", "'LA_0079 LA_T_1271820 - - bonafide\\n'\n", "'LA_0079 LA_T_1272637 - - bonafide\\n'\n", "'LA_0079 LA_T_1276960 - - bonafide\\n'\n", "'LA_0079 LA_T_1341447 - - bonafide\\n'\n", "'LA_0079 LA_T_1363611 - - bonafide\\n'\n", "'LA_0079 LA_T_1596451 - - bonafide\\n'\n", "'LA_0079 LA_T_1608170 - - bonafide\\n'\n", "'LA_0079 LA_T_1684951 - - bonafide\\n'\n", "'LA_0079 LA_T_1699801 - - bonafide\\n'\n", "----------------------------------------------------------------------\n" ] } ] }, { "cell_type": "markdown", "source": [ "so bonafide if human and spoof if ai generated" ], "metadata": { "id": "5YoDt8FBuJH_" } }, { "cell_type": "code", "source": [ "print(\"Looking for spoof samples (with attack ID):\\n\")\n", "print(\"-\" * 70)\n", "count = 0\n", "with open(TRAIN_PROTO, 'r') as f:\n", " for line in f:\n", " if 'spoof' in line:\n", " print(repr(line))\n", " count += 1\n", " if count >= 5:\n", " break\n", "print(\"-\" * 70)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "sETPx1QYuITZ", "executionInfo": { "status": "ok", "timestamp": 1777457493899, "user_tz": 420, "elapsed": 13, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "60a57595-e8b7-4789-993e-18151a78daf3" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Looking for spoof samples (with attack ID):\n", "\n", "----------------------------------------------------------------------\n", "'LA_0079 LA_T_1004644 - A01 spoof\\n'\n", "'LA_0079 LA_T_1056709 - A01 spoof\\n'\n", "'LA_0079 LA_T_1195221 - A01 spoof\\n'\n", "'LA_0079 LA_T_1265032 - A01 spoof\\n'\n", "'LA_0079 LA_T_1287124 - A01 spoof\\n'\n", "----------------------------------------------------------------------\n" ] } ] }, { "cell_type": "code", "source": [ "from collections import Counter\n", "\n", "label_counts = Counter()\n", "attack_counts = Counter()\n", "\n", "with open(TRAIN_PROTO, 'r') as f:\n", " for line in f:\n", " parts = line.strip().split()\n", " if len(parts) != 5:\n", " continue\n", " speaker, utt_id, _, attack_id, label = parts\n", " label_counts[label] += 1\n", " attack_counts[attack_id] += 1\n", "\n", "print(\"Label distribution (train):\")\n", "for label, n in label_counts.most_common():\n", " print(f\" {label}: {n:,}\")\n", "\n", "print(f\"\\nTotal: {sum(label_counts.values()):,}\")\n", "print(f\"Imbalance ratio: 1 bonafide : {label_counts['spoof'] / label_counts['bonafide']:.1f} spoof\\n\")\n", "\n", "print(\"Attack ID distribution (train):\")\n", "for attack, n in sorted(attack_counts.items()):\n", " label = \"bonafide\" if attack == \"-\" else \"spoof\"\n", " print(f\" {attack}: {n:,} ({label})\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "yutzZ_VhuXtK", "executionInfo": { "status": "ok", "timestamp": 1777457531206, "user_tz": 420, "elapsed": 66, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "43180406-ceaa-41f9-f8c9-848c7120400e" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Label distribution (train):\n", " spoof: 22,800\n", " bonafide: 2,580\n", "\n", "Total: 25,380\n", "Imbalance ratio: 1 bonafide : 8.8 spoof\n", "\n", "Attack ID distribution (train):\n", " -: 2,580 (bonafide)\n", " A01: 3,800 (spoof)\n", " A02: 3,800 (spoof)\n", " A03: 3,800 (spoof)\n", " A04: 3,800 (spoof)\n", " A05: 3,800 (spoof)\n", " A06: 3,800 (spoof)\n" ] } ] }, { "cell_type": "markdown", "source": [ "####this demonstrates:\n", "Class imbalance: 1 bona fide for every 8.8 spoofed samples.\n", "If you train a model and just optimize for \"raw accuracy,\" it will quickly learn the laziest possible strategy: always predict \"spoof.\" That gets it ~90% accuracy without learning anything about audio. This is precisely why your proposal called out weighted loss and why anti-spoofing research uses EER instead of accuracy. You can now articulate this from your own data, not from a textbook claim.\n", "Attack distribution: perfectly balanced across A01-A06.\n", "Each of the six training attacks has exactly 3,800 samples. This was a deliberate design choice by the ASVspoof organizers \u2014 they wanted no attack type to dominate. Important implication: the model can't shortcut by memorizing one specific attack's artifacts.\n", "A01-A06 are training attacks. A07-A19 will appear in eval.\n", "The eval set contains attacks the model has never seen during training. This is the entire point of the ASVspoof 2019 LA challenge: can your model generalize to unseen attack types? When you eventually report per-attack EER on eval, you'll see different attacks the model handles differently." ], "metadata": { "id": "lav4fTp-urY1" } }, { "cell_type": "code", "source": [ "import torchaudio\n", "from IPython.display import Audio, display\n", "\n", "TRAIN_FLAC_DIR = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA/ASVspoof2019_LA_train/flac'\n", "\n", "# A bonafide sample (from the protocol output above)\n", "bonafide_path = f\"{TRAIN_FLAC_DIR}/LA_T_1138215.flac\"\n", "\n", "print(\"=\" * 60)\n", "print(\"BONAFIDE SAMPLE: LA_T_1138215.flac\")\n", "print(\"=\" * 60)\n", "waveform, sr = torchaudio.load(bonafide_path)\n", "print(f\"Shape: {waveform.shape} (channels x samples)\")\n", "print(f\"Sample rate: {sr} Hz\")\n", "print(f\"Duration: {waveform.shape[1] / sr:.2f} seconds\")\n", "print(f\"Min/max amplitude: {waveform.min():.3f} / {waveform.max():.3f}\")\n", "display(Audio(waveform.numpy(), rate=sr))\n", "\n", "print(\"\\n\" + \"=\" * 60)\n", "print(\"SPOOF SAMPLE (attack A01): LA_T_1004644.flac\")\n", "print(\"=\" * 60)\n", "spoof_path = f\"{TRAIN_FLAC_DIR}/LA_T_1004644.flac\"\n", "waveform_s, sr_s = torchaudio.load(spoof_path)\n", "print(f\"Shape: {waveform_s.shape}\")\n", "print(f\"Sample rate: {sr_s} Hz\")\n", "print(f\"Duration: {waveform_s.shape[1] / sr_s:.2f} seconds\")\n", "print(f\"Min/max amplitude: {waveform_s.min():.3f} / {waveform_s.max():.3f}\")\n", "display(Audio(waveform_s.numpy(), rate=sr_s))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 394 }, "id": "kApOQeiFu7go", "executionInfo": { "status": "ok", "timestamp": 1777457692312, "user_tz": 420, "elapsed": 12711, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "7503a2f0-8223-402a-bfec-9d8a8d4f4101" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "============================================================\n", "BONAFIDE SAMPLE: LA_T_1138215.flac\n", "============================================================\n", "Shape: torch.Size([1, 55329]) (channels x samples)\n", "Sample rate: 16000 Hz\n", "Duration: 3.46 seconds\n", "Min/max amplitude: -1.000 / 0.974\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "\n", "============================================================\n", "SPOOF SAMPLE (attack A01): LA_T_1004644.flac\n", "============================================================\n", "Shape: torch.Size([1, 30769])\n", "Sample rate: 16000 Hz\n", "Duration: 1.92 seconds\n", "Min/max amplitude: -1.000 / 0.611\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "What you just experienced is the entire point of the field\n", "Modern voice cloning is good enough that human ears are unreliable judges. Researchers have run formal studies on this \u2014 when shown bona fide vs spoofed clips, untrained listeners get accuracy in the 55-70% range (barely above the 50% random-guess baseline for binary classification). With training and headphones, it goes up some, but not dramatically.\n", "This isn't a flaw in your hearing. It's the threat. Voice cloning fraud works precisely because humans can't tell.\n", "Why the bona fide sounded \"more AI\" to you\n", "A few possibilities, all interesting:\n", "\n", "Bona fide ASVspoof recordings were studio-recorded. The bona fide samples were collected in clean recording booths with high-quality mics \u2014 they sound unnaturally clean, no room reverb, no background noise. Real conversations don't sound like that. So clean audio can register to your brain as \"fake-sounding\" because it lacks the imperfections we expect.\n", "Modern TTS often adds learned naturalness. Newer synthesis systems (some of A01-A06) were trained on data that included natural speech patterns \u2014 small breaths, hesitations, micro-imperfections. They can sound more like everyday speech than a sterile studio recording.\n", "You don't know what the speaker normally sounds like. When we judge \"this sounds AI,\" we're often comparing against an internal expectation of what natural speech sounds like \u2014 and that internal model is wrong about studio audio.\n", "\n", "Why this matters for your project\n", "This is genuinely useful for your report's introduction and discussion sections. You can write something like:\n", "\n", "\"Informal listening tests during dataset exploration revealed that spoofed samples in ASVspoof 2019 LA can sound more natural to a casual listener than the corresponding studio-recorded bona fide samples. This observation motivates the need for automated detection systems that can identify subtle synthesis artifacts not perceptible to the human ear.\"\n", "\n", "That's a strong, honest opening framing. It comes from your own engagement with the data, not a textbook claim.\n", "What the model will actually use\n", "Your model \u2014 and any anti-spoofing system \u2014 doesn't rely on \"does this sound natural?\" It looks for specific artifacts of the synthesis process:\n", "\n", "Phase inconsistencies \u2014 synthesizers can produce phase relationships that don't occur in natural speech\n", "High-frequency artifacts \u2014 neural vocoders sometimes leave checkerboard patterns or dead bands above ~7 kHz\n", "Spectral envelope anomalies \u2014 formant transitions that are slightly too smooth or too sharp\n", "Pitch periodicity \u2014 overly regular pitch contours\n", "\n", "These are below the threshold of human hearing in many cases. Wav2Vec 2.0 sees the raw waveform and learns to detect them.\n" ], "metadata": { "id": "FrC1gkh51yRa" } }, { "cell_type": "code", "source": [ "from dataclasses import dataclass\n", "from typing import List\n", "import os\n", "\n", "@dataclass\n", "class Utterance:\n", " \"\"\"One row from a protocol file.\"\"\"\n", " speaker_id: str # e.g., \"LA_0079\"\n", " utterance_id: str # e.g., \"LA_T_1138215\"\n", " attack_id: str # \"-\" for bonafide, \"A01\"-\"A19\" for spoof\n", " label: str # \"bonafide\" or \"spoof\"\n", " label_int: int # 0 = bonafide, 1 = spoof\n", " flac_path: str # full path to the .flac file\n", "\n", "\n", "def parse_protocol(protocol_path: str, audio_root: str) -> List[Utterance]:\n", " \"\"\"\n", " Parse an ASVspoof 2019 LA cm protocol file.\n", "\n", " Args:\n", " protocol_path: full path to the .txt protocol file\n", " audio_root: full path to the folder containing .flac files\n", " (e.g., .../ASVspoof2019_LA_train/flac)\n", "\n", " Returns:\n", " List of Utterance objects.\n", " \"\"\"\n", " utterances = []\n", " with open(protocol_path, 'r') as f:\n", " for line_num, line in enumerate(f, start=1):\n", " parts = line.strip().split()\n", " if len(parts) != 5:\n", " # Skip malformed lines\n", " continue\n", " speaker_id, utt_id, _unused, attack_id, label = parts\n", "\n", " label_int = 0 if label == 'bonafide' else 1\n", "\n", " flac_path = os.path.join(audio_root, f\"{utt_id}.flac\")\n", "\n", " utterances.append(Utterance(\n", " speaker_id=speaker_id,\n", " utterance_id=utt_id,\n", " attack_id=attack_id,\n", " label=label,\n", " label_int=label_int,\n", " flac_path=flac_path,\n", " ))\n", " return utterances\n", "\n", "LA_ROOT = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA'\n", "train_proto = f\"{LA_ROOT}/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt\"\n", "train_audio = f\"{LA_ROOT}/ASVspoof2019_LA_train/flac\"\n", "\n", "train_utterances = parse_protocol(train_proto, train_audio)\n", "\n", "print(f\"Parsed {len(train_utterances):,} utterances from training protocol\\n\")\n", "print(\"First 3 entries:\")\n", "for u in train_utterances[:3]:\n", " print(f\" {u}\")\n", "print(f\"\\nFirst spoof entry:\")\n", "for u in train_utterances:\n", " if u.label == 'spoof':\n", " print(f\" {u}\")\n", " break" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "I9f-TvWJ1x4-", "executionInfo": { "status": "ok", "timestamp": 1777459498497, "user_tz": 420, "elapsed": 63, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "f58e3e8c-eecf-47d8-8eee-564ddc9bbe4d" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Parsed 25,380 utterances from training protocol\n", "\n", "First 3 entries:\n", " Utterance(speaker_id='LA_0079', utterance_id='LA_T_1138215', attack_id='-', label='bonafide', label_int=0, flac_path='/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA/ASVspoof2019_LA_train/flac/LA_T_1138215.flac')\n", " Utterance(speaker_id='LA_0079', utterance_id='LA_T_1271820', attack_id='-', label='bonafide', label_int=0, flac_path='/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA/ASVspoof2019_LA_train/flac/LA_T_1271820.flac')\n", " Utterance(speaker_id='LA_0079', utterance_id='LA_T_1272637', attack_id='-', label='bonafide', label_int=0, flac_path='/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA/ASVspoof2019_LA_train/flac/LA_T_1272637.flac')\n", "\n", "First spoof entry:\n", " Utterance(speaker_id='LA_0079', utterance_id='LA_T_1004644', attack_id='A01', label='spoof', label_int=1, flac_path='/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA/ASVspoof2019_LA_train/flac/LA_T_1004644.flac')\n" ] } ] }, { "cell_type": "markdown", "source": [ "LA_0079 appears in both bonafide and spoof entries. That's intentional in ASVspoof \u2014 the same speaker has both real recordings AND spoofed versions (synthesizers were given samples of LA_0079's voice and asked to mimic them). The model needs to learn \"this is fake regardless of who it claims to be" ], "metadata": { "id": "LKDEMzQ02Dbp" } }, { "cell_type": "code", "source": [ "def parse_all_partitions(la_root: str):\n", " \"\"\"Parse train, dev, and eval protocol files into a dict.\"\"\"\n", " proto_dir = f\"{la_root}/ASVspoof2019_LA_cm_protocols\"\n", "\n", " partitions = {\n", " 'train': {\n", " 'proto': f\"{proto_dir}/ASVspoof2019.LA.cm.train.trn.txt\",\n", " 'audio': f\"{la_root}/ASVspoof2019_LA_train/flac\",\n", " },\n", " 'dev': {\n", " 'proto': f\"{proto_dir}/ASVspoof2019.LA.cm.dev.trl.txt\",\n", " 'audio': f\"{la_root}/ASVspoof2019_LA_dev/flac\",\n", " },\n", " 'eval': {\n", " 'proto': f\"{proto_dir}/ASVspoof2019.LA.cm.eval.trl.txt\",\n", " 'audio': f\"{la_root}/ASVspoof2019_LA_eval/flac\",\n", " },\n", " }\n", "\n", " result = {}\n", " for name, paths in partitions.items():\n", " utterances = parse_protocol(paths['proto'], paths['audio'])\n", " result[name] = utterances\n", " return result\n", "\n", "\n", "# Parse everything\n", "splits = parse_all_partitions(LA_ROOT)\n", "\n", "# Summary table\n", "print(f\"{'Partition':<10} {'Total':>10} {'Bonafide':>10} {'Spoof':>10} {'Ratio':>10}\")\n", "print(\"-\" * 56)\n", "for name, utts in splits.items():\n", " n_total = len(utts)\n", " n_bonafide = sum(1 for u in utts if u.label == 'bonafide')\n", " n_spoof = n_total - n_bonafide\n", " ratio = n_spoof / n_bonafide if n_bonafide > 0 else 0\n", " print(f\"{name:<10} {n_total:>10,} {n_bonafide:>10,} {n_spoof:>10,} {ratio:>9.1f}x\")\n", "\n", "# Show attack ID distribution per partition\n", "from collections import Counter\n", "print(\"\\nAttack IDs per partition:\")\n", "for name, utts in splits.items():\n", " attack_counts = Counter(u.attack_id for u in utts if u.attack_id != '-')\n", " attacks = sorted(attack_counts.keys())\n", " print(f\" {name}: {attacks}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "79qMmYEJ2HJG", "executionInfo": { "status": "ok", "timestamp": 1777459560581, "user_tz": 420, "elapsed": 411, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "c6db161c-7213-4117-b924-c11043663fdb" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Partition Total Bonafide Spoof Ratio\n", "--------------------------------------------------------\n", "train 25,380 2,580 22,800 8.8x\n", "dev 24,844 2,548 22,296 8.8x\n", "eval 71,237 7,355 63,882 8.7x\n", "\n", "Attack IDs per partition:\n", " train: ['A01', 'A02', 'A03', 'A04', 'A05', 'A06']\n", " dev: ['A01', 'A02', 'A03', 'A04', 'A05', 'A06']\n", " eval: ['A07', 'A08', 'A09', 'A10', 'A11', 'A12', 'A13', 'A14', 'A15', 'A16', 'A17', 'A18', 'A19']\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "# Verify the repo is cloned in Colab\n", "REPO_DIR = '/content/deepfake-audio-detection'\n", "if not os.path.exists(REPO_DIR):\n", " print(\"Repo not found. Cloning...\")\n", " !git clone https://github.com/Saracasm/deepfake-audio-detection.git {REPO_DIR}\n", "else:\n", " print(f\"Repo found at {REPO_DIR}\")\n", " !cd {REPO_DIR} && git pull --quiet\n", "\n", "# Make sure src/data/ exists with __init__.py\n", "os.makedirs(f'{REPO_DIR}/src/data', exist_ok=True)\n", "for init_path in [f'{REPO_DIR}/src/__init__.py', f'{REPO_DIR}/src/data/__init__.py']:\n", " if not os.path.exists(init_path):\n", " open(init_path, 'w').close()\n", "\n", "print(f\"Repo ready at {REPO_DIR}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "1bJm86ld2SnT", "executionInfo": { "status": "ok", "timestamp": 1777459608459, "user_tz": 420, "elapsed": 979, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "fb30565b-695e-4159-dfe3-db66ffd7ffa6" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Repo not found. Cloning...\n", "Cloning into '/content/deepfake-audio-detection'...\n", "remote: Enumerating objects: 36, done.\u001b[K\n", "remote: Counting objects: 100% (36/36), done.\u001b[K\n", "remote: Compressing objects: 100% (31/31), done.\u001b[K\n", "remote: Total 36 (delta 12), reused 0 (delta 0), pack-reused 0 (from 0)\u001b[K\n", "Receiving objects: 100% (36/36), 18.88 KiB | 2.36 MiB/s, done.\n", "Resolving deltas: 100% (12/12), done.\n", "Repo ready at /content/deepfake-audio-detection\n" ] } ] }, { "cell_type": "code", "source": [ "PROTOCOLS_PY = '''\"\"\"\n", "ASVspoof 2019 LA protocol parser.\n", "\n", "Reads the official .txt protocol files and yields structured Utterance objects\n", "that downstream code (datasets, evaluation) can use.\n", "\n", "Protocol file format (5 space-separated columns):\n", " speaker_id utterance_id - attack_id label\n", "\n", " speaker_id : anonymized speaker (e.g., \"LA_0079\")\n", " utterance_id : filename without extension (e.g., \"LA_T_1138215\")\n", " column 3 : unused, always \"-\"\n", " attack_id : \"-\" for bonafide, \"A01\"-\"A19\" for spoof samples\n", " label : \"bonafide\" or \"spoof\"\n", "\"\"\"\n", "\n", "from dataclasses import dataclass\n", "from typing import List, Dict\n", "import os\n", "\n", "\n", "@dataclass\n", "class Utterance:\n", " \"\"\"One row from an ASVspoof 2019 LA protocol file.\"\"\"\n", " speaker_id: str\n", " utterance_id: str\n", " attack_id: str # \"-\" for bonafide, \"A01\"-\"A19\" for spoof\n", " label: str # \"bonafide\" or \"spoof\"\n", " label_int: int # 0 = bonafide, 1 = spoof\n", " flac_path: str # absolute path to the .flac file\n", "\n", "\n", "def parse_protocol(protocol_path: str, audio_root: str) -> List[Utterance]:\n", " \"\"\"Parse one ASVspoof 2019 LA cm protocol file.\n", "\n", " Args:\n", " protocol_path: full path to the .txt protocol file.\n", " audio_root: full path to the folder containing the .flac files.\n", "\n", " Returns:\n", " List of Utterance objects, one per valid line.\n", " \"\"\"\n", " utterances: List[Utterance] = []\n", " with open(protocol_path, \"r\") as f:\n", " for line in f:\n", " parts = line.strip().split()\n", " if len(parts) != 5:\n", " continue\n", " speaker_id, utt_id, _unused, attack_id, label = parts\n", " label_int = 0 if label == \"bonafide\" else 1\n", " flac_path = os.path.join(audio_root, f\"{utt_id}.flac\")\n", " utterances.append(Utterance(\n", " speaker_id=speaker_id,\n", " utterance_id=utt_id,\n", " attack_id=attack_id,\n", " label=label,\n", " label_int=label_int,\n", " flac_path=flac_path,\n", " ))\n", " return utterances\n", "\n", "\n", "def parse_all_partitions(la_root: str) -> Dict[str, List[Utterance]]:\n", " \"\"\"Parse train, dev, and eval protocols at once.\n", "\n", " Args:\n", " la_root: path to the LA folder, e.g.\n", " \".../asvspoof_2019/LA\"\n", "\n", " Returns:\n", " Dict with keys \"train\", \"dev\", \"eval\" mapping to lists of Utterances.\n", " \"\"\"\n", " proto_dir = os.path.join(la_root, \"ASVspoof2019_LA_cm_protocols\")\n", " partitions = {\n", " \"train\": (\n", " os.path.join(proto_dir, \"ASVspoof2019.LA.cm.train.trn.txt\"),\n", " os.path.join(la_root, \"ASVspoof2019_LA_train\", \"flac\"),\n", " ),\n", " \"dev\": (\n", " os.path.join(proto_dir, \"ASVspoof2019.LA.cm.dev.trl.txt\"),\n", " os.path.join(la_root, \"ASVspoof2019_LA_dev\", \"flac\"),\n", " ),\n", " \"eval\": (\n", " os.path.join(proto_dir, \"ASVspoof2019.LA.cm.eval.trl.txt\"),\n", " os.path.join(la_root, \"ASVspoof2019_LA_eval\", \"flac\"),\n", " ),\n", " }\n", " return {\n", " name: parse_protocol(proto, audio)\n", " for name, (proto, audio) in partitions.items()\n", " }\n", "\n", "\n", "def class_counts(utterances: List[Utterance]) -> Dict[str, int]:\n", " \"\"\"Return {'bonafide': N, 'spoof': M} counts.\"\"\"\n", " counts = {\"bonafide\": 0, \"spoof\": 0}\n", " for u in utterances:\n", " counts[u.label] += 1\n", " return counts\n", "'''\n", "\n", "PATH = '/content/deepfake-audio-detection/src/data/protocols.py'\n", "with open(PATH, 'w') as f:\n", " f.write(PROTOCOLS_PY)\n", "\n", "print(f\"Wrote {PATH}\")\n", "print(f\"Size: {os.path.getsize(PATH)} bytes\")\n", "print(f\"Lines: {len(PROTOCOLS_PY.splitlines())}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "qJw5AToQ2c-8", "executionInfo": { "status": "ok", "timestamp": 1777459649341, "user_tz": 420, "elapsed": 28, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "9d3804d2-b0e6-448a-89aa-d40a0020a59c" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Wrote /content/deepfake-audio-detection/src/data/protocols.py\n", "Size: 3284 bytes\n", "Lines: 99\n" ] } ] }, { "cell_type": "code", "source": [ "import sys\n", "import importlib\n", "\n", "# Add repo to Python path so we can import from src/\n", "sys.path.insert(0, '/content/deepfake-audio-detection')\n", "\n", "# Force reload in case we re-run\n", "if 'src.data.protocols' in sys.modules:\n", " importlib.reload(sys.modules['src.data.protocols'])\n", "\n", "from src.data.protocols import parse_all_partitions, class_counts\n", "\n", "# Re-parse using the module\n", "splits = parse_all_partitions(LA_ROOT)\n", "\n", "print(\"Re-parsed via module:\")\n", "for name, utts in splits.items():\n", " counts = class_counts(utts)\n", " print(f\" {name}: {len(utts):,} total ({counts['bonafide']:,} bonafide, {counts['spoof']:,} spoof)\")\n", "\n", "print(\"\\nModule works.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Cc8sUveV2hfL", "executionInfo": { "status": "ok", "timestamp": 1777459667997, "user_tz": 420, "elapsed": 514, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "b4f97e20-3e1d-441e-d723-24509dac0e3f" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Re-parsed via module:\n", " train: 25,380 total (2,580 bonafide, 22,800 spoof)\n", " dev: 24,844 total (2,548 bonafide, 22,296 spoof)\n", " eval: 71,237 total (7,355 bonafide, 63,882 spoof)\n", "\n", "Module works.\n" ] } ] }, { "cell_type": "code", "source": [ "!cd /content/deepfake-audio-detection && git config user.email \"95262824+Saracasm@users.noreply.github.com\" && git config user.name \"Sara Iqbal\"\n", "print(\"Git identity configured.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "TtNQ5Oek4JcN", "executionInfo": { "status": "ok", "timestamp": 1777460096342, "user_tz": 420, "elapsed": 114, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "416cd33c-b3cc-4531-a4c7-67788113ce46" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Git identity configured.\n" ] } ] }, { "cell_type": "code", "source": [ "%cd /content/deepfake-audio-detection\n", "!git add src/data/protocols.py src/__init__.py src/data/__init__.py\n", "!git status" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2W0E-3Aq4MaB", "executionInfo": { "status": "ok", "timestamp": 1777460105849, "user_tz": 420, "elapsed": 297, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "6e61beec-7d39-403d-97e6-362b58aec515" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "/content/deepfake-audio-detection\n", "On branch main\n", "Your branch is up to date with 'origin/main'.\n", "\n", "Changes to be committed:\n", " (use \"git restore --staged ...\" to unstage)\n", "\t\u001b[32mnew file: src/__init__.py\u001b[m\n", "\t\u001b[32mnew file: src/data/__init__.py\u001b[m\n", "\t\u001b[32mnew file: src/data/protocols.py\u001b[m\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "!git commit -m \"Phase 2: add ASVspoof 2019 LA protocol parser\"" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "XRtviGEK4cNy", "executionInfo": { "status": "ok", "timestamp": 1777460171389, "user_tz": 420, "elapsed": 271, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "108a1704-3a97-4b02-a080-f44b530f95e1" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[main a14fd25] Phase 2: add ASVspoof 2019 LA protocol parser\n", " 3 files changed, 99 insertions(+)\n", " create mode 100644 src/__init__.py\n", " create mode 100644 src/data/__init__.py\n", " create mode 100644 src/data/protocols.py\n" ] } ] }, { "cell_type": "code", "source": [ "GITHUB_TOKEN = \"REDACTED_TOKEN\"\n", "GITHUB_USER = \"Saracasm\"\n", "REPO = \"deepfake-audio-detection\"\n", "\n", "push_url = f\"https://{GITHUB_USER}:{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{REPO}.git\"\n", "!cd /content/deepfake-audio-detection && git push {push_url} main\n", "\n", "print(\"\\nDone.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "yI30_my94fxE", "executionInfo": { "status": "ok", "timestamp": 1777460486259, "user_tz": 420, "elapsed": 925, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "2e2f13cc-8952-4fe5-caad-a7ea505a2256" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Enumerating objects: 7, done.\n", "Counting objects: 14% (1/7)\rCounting objects: 28% (2/7)\rCounting objects: 42% (3/7)\rCounting objects: 57% (4/7)\rCounting objects: 71% (5/7)\rCounting objects: 85% (6/7)\rCounting objects: 100% (7/7)\rCounting objects: 100% (7/7), done.\n", "Delta compression using up to 2 threads\n", "Compressing objects: 20% (1/5)\rCompressing objects: 40% (2/5)\rCompressing objects: 60% (3/5)\rCompressing objects: 80% (4/5)\rCompressing objects: 100% (5/5)\rCompressing objects: 100% (5/5), done.\n", "Writing objects: 16% (1/6)\rWriting objects: 33% (2/6)\rWriting objects: 50% (3/6)\rWriting objects: 66% (4/6)\rWriting objects: 83% (5/6)\rWriting objects: 100% (6/6)\rWriting objects: 100% (6/6), 1.58 KiB | 1.58 MiB/s, done.\n", "Total 6 (delta 1), reused 0 (delta 0), pack-reused 0\n", "remote: Resolving deltas: 100% (1/1), completed with 1 local object.\u001b[K\n", "To https://github.com/Saracasm/deepfake-audio-detection.git\n", " d9f77a5..a14fd25 main -> main\n", "\n", "Done.\n" ] } ] }, { "cell_type": "code", "source": [ "GITHUB_TOKEN = \"REDACTED_TOKEN\"" ], "metadata": { "id": "DNISx0OS6BBH" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "import torchaudio\n", "import numpy as np\n", "from tqdm import tqdm\n", "import random\n", "\n", "# Sample 500 random training clips and measure durations\n", "random.seed(42)\n", "sample = random.sample(splits['train'], 500)\n", "\n", "durations = []\n", "for u in tqdm(sample, desc=\"Measuring durations\"):\n", " # Load audio (this works across all torchaudio versions)\n", " waveform, sr = torchaudio.load(u.flac_path)\n", " duration_sec = waveform.shape[1] / sr\n", " durations.append(duration_sec)\n", "\n", "durations = np.array(durations)\n", "\n", "print(f\"\\nDuration statistics over {len(durations)} random training clips:\")\n", "print(f\" Min: {durations.min():.2f} sec\")\n", "print(f\" Max: {durations.max():.2f} sec\")\n", "print(f\" Mean: {durations.mean():.2f} sec\")\n", "print(f\" Median: {np.median(durations):.2f} sec\")\n", "print(f\" Std dev: {durations.std():.2f} sec\")\n", "print(f\"\\nPercentiles:\")\n", "print(f\" 10%: {np.percentile(durations, 10):.2f} sec\")\n", "print(f\" 25%: {np.percentile(durations, 25):.2f} sec\")\n", "print(f\" 50%: {np.percentile(durations, 50):.2f} sec\")\n", "print(f\" 75%: {np.percentile(durations, 75):.2f} sec\")\n", "print(f\" 90%: {np.percentile(durations, 90):.2f} sec\")\n", "print(f\"\\nClips shorter than 4 sec: {(durations < 4).sum()} / {len(durations)} ({100*(durations < 4).mean():.1f}%)\")\n", "print(f\"Clips longer than 4 sec: {(durations > 4).sum()} / {len(durations)} ({100*(durations > 4).mean():.1f}%)\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "NreeAb776sTS", "executionInfo": { "status": "ok", "timestamp": 1777460839544, "user_tz": 420, "elapsed": 1214, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "ff0f68d2-d2ca-4011-d6ad-1c93afb3d002" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Measuring durations: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 500/500 [00:01<00:00, 453.95it/s]" ] }, { "output_type": "stream", "name": "stdout", "text": [ "\n", "Duration statistics over 500 random training clips:\n", " Min: 0.82 sec\n", " Max: 10.28 sec\n", " Mean: 3.52 sec\n", " Median: 3.24 sec\n", " Std dev: 1.44 sec\n", "\n", "Percentiles:\n", " 10%: 1.93 sec\n", " 25%: 2.51 sec\n", " 50%: 3.24 sec\n", " 75%: 4.32 sec\n", " 90%: 5.48 sec\n", "\n", "Clips shorter than 4 sec: 346 / 500 (69.2%)\n", "Clips longer than 4 sec: 154 / 500 (30.8%)\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "\n" ] } ] }, { "cell_type": "code", "source": [ "PREPROCESSING_PY = '''\"\"\"\n", "Audio preprocessing for ASVspoof 2019 LA.\n", "\n", "Given a .flac path, produces a list of fixed-length 4-second segments at 16kHz\n", "mono, ready to feed into Wav2Vec 2.0.\n", "\n", "Pipeline:\n", " load .flac -> ensure mono -> ensure 16kHz -> window with 50% overlap\n", " (short clips are zero-padded to one full window)\n", "\"\"\"\n", "\n", "from typing import List\n", "import torch\n", "import torchaudio\n", "import torchaudio.functional as F\n", "\n", "\n", "SAMPLE_RATE = 16000\n", "WINDOW_SECONDS = 4.0\n", "OVERLAP_RATIO = 0.5\n", "\n", "WINDOW_SAMPLES = int(SAMPLE_RATE * WINDOW_SECONDS) # 64000\n", "HOP_SAMPLES = int(WINDOW_SAMPLES * (1.0 - OVERLAP_RATIO)) # 32000\n", "\n", "\n", "def load_audio(path: str, target_sr: int = SAMPLE_RATE) -> torch.Tensor:\n", " \"\"\"Load a .flac file and return a 1-D mono waveform at target_sr.\n", "\n", " Returns:\n", " Tensor of shape (num_samples,) at target_sr.\n", " \"\"\"\n", " waveform, sr = torchaudio.load(path) # shape: (channels, num_samples)\n", "\n", " # Ensure mono: average if multi-channel\n", " if waveform.shape[0] > 1:\n", " waveform = waveform.mean(dim=0, keepdim=True)\n", "\n", " # Ensure target sample rate\n", " if sr != target_sr:\n", " waveform = F.resample(waveform, orig_freq=sr, new_freq=target_sr)\n", "\n", " return waveform.squeeze(0) # shape: (num_samples,)\n", "\n", "\n", "def segment_waveform(\n", " waveform: torch.Tensor,\n", " window_samples: int = WINDOW_SAMPLES,\n", " hop_samples: int = HOP_SAMPLES,\n", ") -> List[torch.Tensor]:\n", " \"\"\"Split a 1-D waveform into fixed-length windows with overlap.\n", "\n", " Short clips (< window_samples) are zero-padded to one full window.\n", " Long clips are windowed with the given hop size; the last window\n", " may be partially padded if it does not fit exactly.\n", "\n", " Returns:\n", " List of tensors, each of shape (window_samples,).\n", " \"\"\"\n", " n = waveform.shape[0]\n", "\n", " # Short clip: zero-pad to one full window\n", " if n <= window_samples:\n", " padded = torch.zeros(window_samples, dtype=waveform.dtype)\n", " padded[:n] = waveform\n", " return [padded]\n", "\n", " # Long clip: walk through with hop, padding the final window if needed\n", " windows = []\n", " start = 0\n", " while start < n:\n", " end = start + window_samples\n", " if end <= n:\n", " windows.append(waveform[start:end])\n", " else:\n", " # Last window \u2014 pad with zeros up to window_samples\n", " tail = waveform[start:]\n", " padded = torch.zeros(window_samples, dtype=waveform.dtype)\n", " padded[:tail.shape[0]] = tail\n", " windows.append(padded)\n", " break # done after the last partial window\n", " start += hop_samples\n", "\n", " return windows\n", "\n", "\n", "def preprocess(path: str) -> List[torch.Tensor]:\n", " \"\"\"Full pipeline: load + window. Convenience wrapper.\"\"\"\n", " waveform = load_audio(path)\n", " return segment_waveform(waveform)\n", "'''\n", "\n", "PATH = '/content/deepfake-audio-detection/src/data/preprocessing.py'\n", "with open(PATH, 'w') as f:\n", " f.write(PREPROCESSING_PY)\n", "\n", "print(f\"Wrote {PATH}\")\n", "print(f\"Size: {len(PREPROCESSING_PY)} bytes\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "HaK7bohZ7k94", "executionInfo": { "status": "ok", "timestamp": 1777460992917, "user_tz": 420, "elapsed": 98, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "86998ee2-03fb-43d5-bca1-d66d6370672f" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Wrote /content/deepfake-audio-detection/src/data/preprocessing.py\n", "Size: 2720 bytes\n" ] } ] }, { "cell_type": "code", "source": [ "import torch\n", "import importlib\n", "import src.data.preprocessing\n", "importlib.reload(src.data.preprocessing)\n", "from src.data.preprocessing import load_audio, segment_waveform, preprocess, WINDOW_SAMPLES, HOP_SAMPLES\n", "\n", "print(f\"Window: {WINDOW_SAMPLES} samples ({WINDOW_SAMPLES/16000:.1f} sec)\")\n", "print(f\"Hop: {HOP_SAMPLES} samples ({HOP_SAMPLES/16000:.1f} sec)\\n\")\n", "\n", "# --- Pick three test clips of different lengths ---\n", "# We already have `splits` from earlier, with `train` utterances loaded\n", "# We'll pick a known short clip, a medium clip, and find a long one\n", "print(\"Finding test clips of varying length...\")\n", "\n", "short_clip = None\n", "medium_clip = None\n", "long_clip = None\n", "\n", "# Sample some clips and bucket them\n", "for u in splits['train'][:200]:\n", " w = load_audio(u.flac_path)\n", " duration = w.shape[0] / 16000\n", " if duration < 2.0 and short_clip is None:\n", " short_clip = (u, w, duration)\n", " elif 3.5 < duration < 4.5 and medium_clip is None:\n", " medium_clip = (u, w, duration)\n", " elif duration > 7.0 and long_clip is None:\n", " long_clip = (u, w, duration)\n", " if all([short_clip, medium_clip, long_clip]):\n", " break\n", "\n", "# --- Test each ---\n", "print(\"\\n\" + \"=\" * 70)\n", "print(\"TEST 1: Short clip (should produce 1 window with padding)\")\n", "print(\"=\" * 70)\n", "if short_clip:\n", " u, w, duration = short_clip\n", " print(f\" File: {u.utterance_id}\")\n", " print(f\" Original duration: {duration:.2f} sec ({w.shape[0]} samples)\")\n", " windows = segment_waveform(w)\n", " print(f\" Windows produced: {len(windows)}\")\n", " print(f\" Each window shape: {windows[0].shape}\")\n", " # Check: real audio at start, zeros at end\n", " n_real = w.shape[0]\n", " print(f\" First {n_real} samples match original: {torch.allclose(windows[0][:n_real], w)}\")\n", " print(f\" Padding zeros at end: {(windows[0][n_real:] == 0).all().item()}\")\n", "else:\n", " print(\" No clip under 2 sec found in first 200 utterances\")\n", "\n", "print(\"\\n\" + \"=\" * 70)\n", "print(\"TEST 2: Medium clip (~4 sec, should produce 1 window)\")\n", "print(\"=\" * 70)\n", "if medium_clip:\n", " u, w, duration = medium_clip\n", " print(f\" File: {u.utterance_id}\")\n", " print(f\" Original duration: {duration:.2f} sec ({w.shape[0]} samples)\")\n", " windows = segment_waveform(w)\n", " print(f\" Windows produced: {len(windows)}\")\n", " print(f\" Each window shape: {windows[0].shape}\")\n", "\n", "print(\"\\n\" + \"=\" * 70)\n", "print(\"TEST 3: Long clip (>7 sec, should produce multiple windows)\")\n", "print(\"=\" * 70)\n", "if long_clip:\n", " u, w, duration = long_clip\n", " print(f\" File: {u.utterance_id}\")\n", " print(f\" Original duration: {duration:.2f} sec ({w.shape[0]} samples)\")\n", " windows = segment_waveform(w)\n", " print(f\" Windows produced: {len(windows)}\")\n", " print(f\" Each window shape: {windows[0].shape}\")\n", " # Predict expected: ceil((n - window) / hop) + 1\n", " expected = max(1, ((w.shape[0] - WINDOW_SAMPLES + HOP_SAMPLES - 1) // HOP_SAMPLES) + 1)\n", " print(f\" Expected windows: {expected}\")\n", "else:\n", " print(\" No clip over 7 sec found in first 200 utterances\")\n", "\n", "print(\"\\n\" + \"=\" * 70)\n", "print(\"Preprocessing tests complete.\")\n", "print(\"=\" * 70)" ], "metadata": { "id": "PSFL3eLM70pR", "executionInfo": { "status": "ok", "timestamp": 1777461163367, "user_tz": 420, "elapsed": 387, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "5dc2f3ce-7fd2-496d-a4d2-75ee12440b26", "colab": { "base_uri": "https://localhost:8080/" } }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Window: 64000 samples (4.0 sec)\n", "Hop: 32000 samples (2.0 sec)\n", "\n", "Finding test clips of varying length...\n", "\n", "======================================================================\n", "TEST 1: Short clip (should produce 1 window with padding)\n", "======================================================================\n", " File: LA_T_9701712\n", " Original duration: 2.00 sec (31966 samples)\n", " Windows produced: 1\n", " Each window shape: torch.Size([64000])\n", " First 31966 samples match original: True\n", " Padding zeros at end: True\n", "\n", "======================================================================\n", "TEST 2: Medium clip (~4 sec, should produce 1 window)\n", "======================================================================\n", " File: LA_T_1271820\n", " Original duration: 4.40 sec (70323 samples)\n", " Windows produced: 2\n", " Each window shape: torch.Size([64000])\n", "\n", "======================================================================\n", "TEST 3: Long clip (>7 sec, should produce multiple windows)\n", "======================================================================\n", " No clip over 7 sec found in first 200 utterances\n", "\n", "======================================================================\n", "Preprocessing tests complete.\n", "======================================================================\n" ] } ] }, { "cell_type": "code", "source": [ "# Search a wider sample to find a clip > 7 seconds\n", "print(\"Searching for a long clip (>7 sec) in first 5,000 utterances...\")\n", "long_clip = None\n", "for u in splits['train'][:5000]:\n", " w = load_audio(u.flac_path)\n", " duration = w.shape[0] / 16000\n", " if duration > 7.0:\n", " long_clip = (u, w, duration)\n", " break\n", "\n", "if long_clip:\n", " u, w, duration = long_clip\n", " print(f\"\\nFound: {u.utterance_id}\")\n", " print(f\"Original duration: {duration:.2f} sec ({w.shape[0]} samples)\")\n", " windows = segment_waveform(w)\n", " print(f\"Windows produced: {len(windows)}\")\n", "\n", " # Show how each window aligns with the original audio\n", " print(f\"\\nWindow alignment:\")\n", " for i, win in enumerate(windows):\n", " start_sample = i * HOP_SAMPLES\n", " end_sample = start_sample + WINDOW_SAMPLES\n", " real_samples = min(WINDOW_SAMPLES, w.shape[0] - start_sample)\n", " padded_samples = WINDOW_SAMPLES - real_samples\n", " start_sec = start_sample / 16000\n", " end_sec = end_sample / 16000\n", " print(f\" Window {i}: covers {start_sec:.2f}-{end_sec:.2f} sec \"\n", " f\"({real_samples} real + {padded_samples} padded samples)\")\n", "else:\n", " print(\"Still no clip > 7 sec. The training set may not contain very long clips.\")\n", " print(\"Skipping test 3 \u2014 preprocessing logic is verified by tests 1 and 2 anyway.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "32VH7t2G9KV1", "executionInfo": { "status": "ok", "timestamp": 1777461410237, "user_tz": 420, "elapsed": 1581, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "22e4f0ec-7869-4e4d-8f47-ef1caa010f5a" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Searching for a long clip (>7 sec) in first 5,000 utterances...\n", "\n", "Found: LA_T_1963829\n", "Original duration: 7.38 sec (118059 samples)\n", "Windows produced: 3\n", "\n", "Window alignment:\n", " Window 0: covers 0.00-4.00 sec (64000 real + 0 padded samples)\n", " Window 1: covers 2.00-6.00 sec (64000 real + 0 padded samples)\n", " Window 2: covers 4.00-8.00 sec (54059 real + 9941 padded samples)\n" ] } ] }, { "cell_type": "code", "source": [ "import shutil\n", "import os\n", "\n", "# Find the notebook in Drive\n", "drive_notebook = '/content/drive/MyDrive/Colab Notebooks/01_data_acquisition.ipynb'\n", "repo_notebook = '/content/deepfake-audio-detection/notebooks/01_data_acquisition.ipynb'\n", "\n", "# Make sure target folder exists\n", "os.makedirs(os.path.dirname(repo_notebook), exist_ok=True)\n", "\n", "# Verify source exists (notebook may have a slightly different name if you renamed it)\n", "if not os.path.exists(drive_notebook):\n", " print(f\"NOT FOUND: {drive_notebook}\")\n", " print(\"\\nLet me list what's actually in Colab Notebooks/ so we can find it:\")\n", " drive_dir = '/content/drive/MyDrive/Colab Notebooks'\n", " if os.path.exists(drive_dir):\n", " for f in sorted(os.listdir(drive_dir)):\n", " print(f\" - {f}\")\n", "else:\n", " shutil.copy(drive_notebook, repo_notebook)\n", " size_kb = os.path.getsize(repo_notebook) / 1024\n", " print(f\"Copied notebook to repo: {repo_notebook}\")\n", " print(f\"Size: {size_kb:.1f} KB\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "UFsKBhh790AY", "executionInfo": { "status": "ok", "timestamp": 1777461579431, "user_tz": 420, "elapsed": 12, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "b460eab3-8d92-4a6e-d015-de112df13738" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "NOT FOUND: /content/drive/MyDrive/Colab Notebooks/01_data_acquisition.ipynb\n", "\n", "Let me list what's actually in Colab Notebooks/ so we can find it:\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "# Search the entire Drive for any .ipynb files\n", "print(\"Searching for .ipynb files in Drive...\")\n", "print(\"(may take 30 seconds on a big Drive)\\n\")\n", "\n", "found = []\n", "for root, dirs, files in os.walk('/content/drive/MyDrive'):\n", " # Skip the dataset folder (lots of files, no notebooks)\n", " if 'deepfake_audio' in root and 'data' in root:\n", " dirs.clear() # don't descend\n", " continue\n", " for f in files:\n", " if f.endswith('.ipynb'):\n", " full = os.path.join(root, f)\n", " size_kb = os.path.getsize(full) / 1024\n", " found.append((full, size_kb))\n", "\n", "if found:\n", " print(f\"Found {len(found)} notebook(s):\\n\")\n", " for path, size in found:\n", " print(f\" [{size:>7.1f} KB] {path}\")\n", "else:\n", " print(\"No .ipynb files found in Drive.\")\n", " print(\"\\nThis means the notebook is only in Colab's session storage,\")\n", " print(\"not actually saved to Drive. We need to fix that.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "v36G75WC99WW", "executionInfo": { "status": "ok", "timestamp": 1777461616748, "user_tz": 420, "elapsed": 25, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "9606056f-6fda-4dba-e867-43b6a9cc4733" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Searching for .ipynb files in Drive...\n", "(may take 30 seconds on a big Drive)\n", "\n", "No .ipynb files found in Drive.\n", "\n", "This means the notebook is only in Colab's session storage,\n", "not actually saved to Drive. We need to fix that.\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "print(\"=== Drive (persists across disconnects) ===\")\n", "drive_la = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA'\n", "print(f\" Dataset on Drive: {os.path.exists(drive_la)}\")\n", "\n", "print(\"\\n=== Colab local disk (wiped on disconnect) ===\")\n", "repo_dir = '/content/deepfake-audio-detection'\n", "print(f\" Repo cloned: {os.path.exists(repo_dir)}\")\n", "preprocessing_py = f'{repo_dir}/src/data/preprocessing.py'\n", "dataset_py = f'{repo_dir}/src/data/dataset.py'\n", "print(f\" preprocessing.py: {os.path.exists(preprocessing_py)}\")\n", "print(f\" dataset.py: {os.path.exists(dataset_py)}\")\n", "\n", "print(\"\\n=== Python session memory ===\")\n", "try:\n", " print(f\" splits variable: exists, train has {len(splits['train'])} utterances\")\n", "except NameError:\n", " print(f\" splits variable: GONE (need to re-parse)\")\n", "\n", "try:\n", " print(f\" LA_ROOT defined: {LA_ROOT}\")\n", "except NameError:\n", " print(f\" LA_ROOT defined: GONE\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YE1vy_4J-cO6", "executionInfo": { "status": "ok", "timestamp": 1777461780303, "user_tz": 420, "elapsed": 31, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "342059d4-9d62-4934-d705-c0d56616ee1d" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "=== Drive (persists across disconnects) ===\n", " Dataset on Drive: False\n", "\n", "=== Colab local disk (wiped on disconnect) ===\n", " Repo cloned: False\n", " preprocessing.py: False\n", " dataset.py: False\n", "\n", "=== Python session memory ===\n", " splits variable: GONE (need to re-parse)\n", " LA_ROOT defined: GONE\n" ] } ] }, { "cell_type": "code", "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')\n", "\n", "import os\n", "DRIVE_LA = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA'\n", "print(f\"Dataset on Drive: {os.path.exists(DRIVE_LA)}\")\n", "print(f\"Contents: {sorted(os.listdir(DRIVE_LA))[:3] if os.path.exists(DRIVE_LA) else 'N/A'}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 356 }, "id": "U1D6W1LW-tqv", "executionInfo": { "status": "error", "timestamp": 1777462022163, "user_tz": 420, "elapsed": 121836, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "366fa1e9-3ded-4e1f-8a14-fe00f5eb60e8" }, "execution_count": null, "outputs": [ { "output_type": "error", "ename": "ValueError", "evalue": "mount failed", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/tmp/ipykernel_1225/1916485019.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mgoogle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolab\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdrive\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdrive\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/content/drive'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mDRIVE_LA\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/drive.py\u001b[0m in \u001b[0;36mmount\u001b[0;34m(mountpoint, force_remount, timeout_ms, readonly)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmountpoint\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mforce_remount\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout_ms\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m120000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreadonly\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0;34m\"\"\"Mount your Google Drive at the specified mountpoint path.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 97\u001b[0;31m return _mount(\n\u001b[0m\u001b[1;32m 98\u001b[0m \u001b[0mmountpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0mforce_remount\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mforce_remount\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/drive.py\u001b[0m in \u001b[0;36m_mount\u001b[0;34m(mountpoint, force_remount, timeout_ms, ephemeral, readonly)\u001b[0m\n\u001b[1;32m 270\u001b[0m \u001b[0;34m'https://research.google.com/colaboratory/faq.html#drive-timeout'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 271\u001b[0m )\n\u001b[0;32m--> 272\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'mount failed'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mextra_reason\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 273\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mcase\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 274\u001b[0m \u001b[0;31m# Terminate the DriveFS binary before killing bash.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: mount failed" ] } ] }, { "cell_type": "code", "source": [ "from google.colab import drive\n", "drive.flush_and_unmount()\n", "drive.mount('/content/drive', force_remount=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rw1ejRYA_uCv", "executionInfo": { "status": "ok", "timestamp": 1777462094541, "user_tz": 420, "elapsed": 15902, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "d25977db-0602-41f8-cd06-39cdaed9ca79" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Drive not mounted, so nothing to flush and unmount.\n", "Mounted at /content/drive\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "DRIVE_LA = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA'\n", "print(f\"Dataset on Drive: {os.path.exists(DRIVE_LA)}\")\n", "if os.path.exists(DRIVE_LA):\n", " print(f\"Contents: {sorted(os.listdir(DRIVE_LA))}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "N2SSP0Rl_9ob", "executionInfo": { "status": "ok", "timestamp": 1777462145481, "user_tz": 420, "elapsed": 2330, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "dee69054-6c48-4ce2-ca92-970681465019" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Dataset on Drive: False\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "# Check what's actually at the root of MyDrive\n", "print(\"=== Top-level of MyDrive ===\")\n", "mydrive = '/content/drive/MyDrive'\n", "if os.path.exists(mydrive):\n", " for item in sorted(os.listdir(mydrive))[:30]:\n", " full = os.path.join(mydrive, item)\n", " kind = \"FOLDER\" if os.path.isdir(full) else \"file\"\n", " print(f\" [{kind}] {item}\")\n", "else:\n", " print(\" MyDrive itself doesn't exist?\")\n", "\n", "# Specifically look for our project folder\n", "print(\"\\n=== Looking for deepfake_audio folder ===\")\n", "target = '/content/drive/MyDrive/deepfake_audio'\n", "print(f\" Exists: {os.path.exists(target)}\")\n", "if os.path.exists(target):\n", " print(f\" Contents:\")\n", " for item in sorted(os.listdir(target)):\n", " print(f\" - {item}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "B6ntBh72AEir", "executionInfo": { "status": "ok", "timestamp": 1777462170703, "user_tz": 420, "elapsed": 8, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "df375125-5161-4c17-c6d3-227a4c6e8fe0" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "=== Top-level of MyDrive ===\n", " [file] 0802-python-tutorial.pdf\n", " [file] 1000026170.jpg\n", " [file] 1000026171.jpg\n", " [file] 1000026172.jpg\n", " [file] 1000026173.jpg\n", " [file] 1000026174.jpg\n", " [file] 1000026176.jpg\n", " [file] 1000026178.jpg\n", " [file] 7.png\n", " [file] A Complete Guide to Programming C++.pdf\n", " [file] ADDICTION.pdf\n", " [file] Abnormal Psychology.pdf\n", " [file] Beginning Programming with Python For Dummies Mueller, John Paul [SRG].pdf\n", " [file] Beiratkoza\u0301si Lap Nyomtatva\u0301ny - To\u0308bbnyelvu\u030b new.pdf\n", " [FOLDER] Colab Notebooks\n", " [FOLDER] Colab-Notebooks\n", " [file] DLP-Project.gprj\n", " [file] DS LAB HOME TASKS.docx\n", " [file] DSC_0277.JPG\n", " [file] DSC_0278.JPG\n", " [file] DSC_0279.JPG\n", " [file] DSC_0409.JPG\n", " [file] LECTURE 3.pptx\n", " [file] LECTURE PERSONALITY (1).pdf\n", " [file] Leadership.pdf\n", " [file] Lecture 2 RESEARCH METHODS.pdf\n", " [FOLDER] Mortgage Docs\n", " [file] Name Sara IqbalStudent ID 23K-0669Class CS-5G.pdf\n", " [FOLDER] OOP concept texts\n", " [file] Resume-Sara-Iqbal.pdf\n", "\n", "=== Looking for deepfake_audio folder ===\n", " Exists: True\n", " Contents:\n", " - checkpoints\n", " - data\n", " - logs\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "print(\"=== Drilling into deepfake_audio/data/ ===\")\n", "data = '/content/drive/MyDrive/deepfake_audio/data'\n", "for item in sorted(os.listdir(data)):\n", " full = os.path.join(data, item)\n", " print(f\" {item}/\")\n", " if os.path.isdir(full):\n", " for sub in sorted(os.listdir(full))[:5]:\n", " print(f\" - {sub}\")\n", "\n", "print(\"\\n=== Verifying ASVspoof 2019 LA structure ===\")\n", "LA_ROOT = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA'\n", "print(f\" Exists: {os.path.exists(LA_ROOT)}\")\n", "if os.path.exists(LA_ROOT):\n", " contents = sorted(os.listdir(LA_ROOT))\n", " print(f\" Contents: {contents}\")\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "PDJAjitQAORn", "executionInfo": { "status": "ok", "timestamp": 1777462211674, "user_tz": 420, "elapsed": 21, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "0df13b56-813d-40aa-e388-d22bdf7786bb" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "=== Drilling into deepfake_audio/data/ ===\n", " processed/\n", " raw/\n", " - asvspoof_2019\n", " - asvspoof_2021\n", " - wavefake\n", "\n", "=== Verifying ASVspoof 2019 LA structure ===\n", " Exists: False\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "asvspoof_dir = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019'\n", "print(f\"Path: {asvspoof_dir}\")\n", "print(f\"Exists: {os.path.exists(asvspoof_dir)}\\n\")\n", "\n", "if os.path.exists(asvspoof_dir):\n", " contents = sorted(os.listdir(asvspoof_dir))\n", " print(f\"Found {len(contents)} item(s):\")\n", " for item in contents:\n", " full = os.path.join(asvspoof_dir, item)\n", " kind = \"FOLDER\" if os.path.isdir(full) else \"file\"\n", " try:\n", " size = os.path.getsize(full) if not os.path.isdir(full) else sum(\n", " os.path.getsize(os.path.join(r, f))\n", " for r, d, fs in os.walk(full)\n", " for f in fs\n", " )\n", " size_str = f\"{size/1e9:.2f} GB\" if size > 1e9 else f\"{size/1e6:.2f} MB\" if size > 1e6 else f\"{size:,} B\"\n", " except:\n", " size_str = \"?\"\n", " print(f\" [{kind}] {item} ({size_str})\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "t9IC2d2cAV3Y", "executionInfo": { "status": "ok", "timestamp": 1777462241586, "user_tz": 420, "elapsed": 34, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "44e7f98a-f245-4bf2-9a0f-3427e9c53ebf" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Path: /content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019\n", "Exists: True\n", "\n", "Found 0 item(s):\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "print(\"=== Full state of deepfake_audio/ ===\\n\")\n", "root = '/content/drive/MyDrive/deepfake_audio'\n", "for r, dirs, files in os.walk(root):\n", " indent = ' ' * (r.replace(root, '').count('/'))\n", " print(f\"{indent}{os.path.basename(r) or 'deepfake_audio'}/\")\n", " for f in files:\n", " size_kb = os.path.getsize(os.path.join(r, f)) / 1024\n", " print(f\"{indent} - {f} ({size_kb:.1f} KB)\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KatAj0wbBMpY", "executionInfo": { "status": "ok", "timestamp": 1777462473297, "user_tz": 420, "elapsed": 106, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "5b5bc27c-2e9a-468f-e76b-5d349b49ffba" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "=== Full state of deepfake_audio/ ===\n", "\n", "deepfake_audio/\n", " data/\n", " raw/\n", " asvspoof_2019/\n", " asvspoof_2021/\n", " wavefake/\n", " processed/\n", " checkpoints/\n", " logs/\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "REPO_DIR = '/content/deepfake-audio-detection'\n", "if os.path.exists(REPO_DIR):\n", " import shutil\n", " shutil.rmtree(REPO_DIR)\n", "\n", "!git clone https://github.com/Saracasm/deepfake-audio-detection.git {REPO_DIR}\n", "print(\"\\nRepo re-cloned.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "X2sAQ_tPCKLX", "executionInfo": { "status": "ok", "timestamp": 1777462719759, "user_tz": 420, "elapsed": 1412, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "f5239c80-d472-4090-b901-2b384e1ee1d2" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Cloning into '/content/deepfake-audio-detection'...\n", "remote: Enumerating objects: 42, done.\u001b[K\n", "remote: Counting objects: 100% (42/42), done.\u001b[K\n", "remote: Compressing objects: 100% (35/35), done.\u001b[K\n", "remote: Total 42 (delta 13), reused 7 (delta 1), pack-reused 0 (from 0)\u001b[K\n", "Receiving objects: 100% (42/42), 20.42 KiB | 20.42 MiB/s, done.\n", "Resolving deltas: 100% (13/13), done.\n", "\n", "Repo re-cloned.\n" ] } ] }, { "cell_type": "code", "source": [ "PREPROCESSING_PY = '''\"\"\"\n", "Audio preprocessing for ASVspoof 2019 LA.\n", "\n", "Given a .flac path, produces a list of fixed-length 4-second segments at 16kHz\n", "mono, ready to feed into Wav2Vec 2.0.\n", "\n", "Pipeline:\n", " load .flac -> ensure mono -> ensure 16kHz -> window with 50% overlap\n", " (short clips are zero-padded to one full window)\n", "\"\"\"\n", "\n", "from typing import List\n", "import torch\n", "import torchaudio\n", "import torchaudio.functional as F\n", "\n", "\n", "SAMPLE_RATE = 16000\n", "WINDOW_SECONDS = 4.0\n", "OVERLAP_RATIO = 0.5\n", "\n", "WINDOW_SAMPLES = int(SAMPLE_RATE * WINDOW_SECONDS) # 64000\n", "HOP_SAMPLES = int(WINDOW_SAMPLES * (1.0 - OVERLAP_RATIO)) # 32000\n", "\n", "\n", "def load_audio(path: str, target_sr: int = SAMPLE_RATE) -> torch.Tensor:\n", " \"\"\"Load a .flac file and return a 1-D mono waveform at target_sr.\"\"\"\n", " waveform, sr = torchaudio.load(path)\n", " if waveform.shape[0] > 1:\n", " waveform = waveform.mean(dim=0, keepdim=True)\n", " if sr != target_sr:\n", " waveform = F.resample(waveform, orig_freq=sr, new_freq=target_sr)\n", " return waveform.squeeze(0)\n", "\n", "\n", "def segment_waveform(\n", " waveform: torch.Tensor,\n", " window_samples: int = WINDOW_SAMPLES,\n", " hop_samples: int = HOP_SAMPLES,\n", ") -> List[torch.Tensor]:\n", " \"\"\"Split a 1-D waveform into fixed-length windows with overlap.\"\"\"\n", " n = waveform.shape[0]\n", " if n <= window_samples:\n", " padded = torch.zeros(window_samples, dtype=waveform.dtype)\n", " padded[:n] = waveform\n", " return [padded]\n", "\n", " windows = []\n", " start = 0\n", " while start < n:\n", " end = start + window_samples\n", " if end <= n:\n", " windows.append(waveform[start:end])\n", " else:\n", " tail = waveform[start:]\n", " padded = torch.zeros(window_samples, dtype=waveform.dtype)\n", " padded[:tail.shape[0]] = tail\n", " windows.append(padded)\n", " break\n", " start += hop_samples\n", " return windows\n", "\n", "\n", "def preprocess(path: str) -> List[torch.Tensor]:\n", " \"\"\"Full pipeline: load + window.\"\"\"\n", " waveform = load_audio(path)\n", " return segment_waveform(waveform)\n", "'''\n", "\n", "PATH = '/content/deepfake-audio-detection/src/data/preprocessing.py'\n", "os.makedirs(os.path.dirname(PATH), exist_ok=True)\n", "with open(PATH, 'w') as f:\n", " f.write(PREPROCESSING_PY)\n", "print(f\"Wrote {PATH} ({len(PREPROCESSING_PY)} bytes)\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Gtv3tExQCNQ6", "executionInfo": { "status": "ok", "timestamp": 1777462730771, "user_tz": 420, "elapsed": 33, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "7e8ed27a-2c9d-4fa3-8134-d4fa8906ef2f" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Wrote /content/deepfake-audio-detection/src/data/preprocessing.py (1990 bytes)\n" ] } ] }, { "cell_type": "code", "source": [ "DATASET_PY = '''\"\"\"\n", "PyTorch Dataset for ASVspoof 2019 LA.\n", "\n", "One Dataset entry = one 4-second audio window + its binary label.\n", "Long utterances expand into multiple entries (one per window).\n", "\"\"\"\n", "\n", "from typing import List, Tuple\n", "import torch\n", "from torch.utils.data import Dataset\n", "\n", "from src.data.protocols import Utterance\n", "from src.data.preprocessing import (\n", " load_audio,\n", " segment_waveform,\n", " WINDOW_SAMPLES,\n", " HOP_SAMPLES,\n", ")\n", "\n", "\n", "def _count_windows(duration_samples: int) -> int:\n", " if duration_samples <= WINDOW_SAMPLES:\n", " return 1\n", " n = (duration_samples - WINDOW_SAMPLES + HOP_SAMPLES - 1) // HOP_SAMPLES + 1\n", " return max(1, n)\n", "\n", "\n", "class ASVspoofDataset(Dataset):\n", " \"\"\"One sample = one 4-sec window + its label.\"\"\"\n", "\n", " def __init__(\n", " self,\n", " utterances: List[Utterance],\n", " durations_samples: List[int] = None,\n", " ):\n", " self.utterances = utterances\n", " self.index: List[Tuple[int, int]] = []\n", " if durations_samples is None:\n", " for i in range(len(utterances)):\n", " self.index.append((i, 0))\n", " else:\n", " assert len(durations_samples) == len(utterances)\n", " for i, d in enumerate(durations_samples):\n", " n_windows = _count_windows(d)\n", " for w in range(n_windows):\n", " self.index.append((i, w))\n", "\n", " def __len__(self) -> int:\n", " return len(self.index)\n", "\n", " def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int, str]:\n", " utt_idx, window_idx = self.index[idx]\n", " utt = self.utterances[utt_idx]\n", " waveform = load_audio(utt.flac_path)\n", " windows = segment_waveform(waveform)\n", " window_idx = min(window_idx, len(windows) - 1)\n", " return windows[window_idx], utt.label_int, utt.utterance_id\n", "'''\n", "\n", "PATH = '/content/deepfake-audio-detection/src/data/dataset.py'\n", "with open(PATH, 'w') as f:\n", " f.write(DATASET_PY)\n", "print(f\"Wrote {PATH} ({len(DATASET_PY)} bytes)\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "We2rPQJgCPAx", "executionInfo": { "status": "ok", "timestamp": 1777462744250, "user_tz": 420, "elapsed": 28, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "06a37180-3a81-473d-de8f-ada8241bb30a" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Wrote /content/deepfake-audio-detection/src/data/dataset.py (1741 bytes)\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "files = [\n", " '/content/deepfake-audio-detection/src/data/protocols.py',\n", " '/content/deepfake-audio-detection/src/data/preprocessing.py',\n", " '/content/deepfake-audio-detection/src/data/dataset.py',\n", "]\n", "for f in files:\n", " exists = os.path.exists(f)\n", " size = os.path.getsize(f) if exists else 0\n", " print(f\" {'OK' if exists else 'MISSING'} {f} ({size} bytes)\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "whMHGvPZCTif", "executionInfo": { "status": "ok", "timestamp": 1777462756455, "user_tz": 420, "elapsed": 26, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "25a7d3a0-167f-4b8e-9c5b-61cd738e0256" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " OK /content/deepfake-audio-detection/src/data/protocols.py (3284 bytes)\n", " OK /content/deepfake-audio-detection/src/data/preprocessing.py (1990 bytes)\n", " OK /content/deepfake-audio-detection/src/data/dataset.py (1741 bytes)\n" ] } ] }, { "cell_type": "code", "source": [ "from google.colab import files\n", "print(\"Click 'Choose Files' and select kaggle.json from your Downloads folder\")\n", "uploaded = files.upload()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 90 }, "id": "oKo2zIQtCZ70", "executionInfo": { "status": "ok", "timestamp": 1777462789181, "user_tz": 420, "elapsed": 6454, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "62d741ef-f3af-4230-e02a-c90db66c49c4" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Click 'Choose Files' and select kaggle.json from your Downloads folder\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " \n", " Upload widget is only available when the cell has been executed in the\n", " current browser session. Please rerun this cell to enable.\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Saving kaggle.json to kaggle.json\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "import shutil\n", "\n", "os.makedirs('/root/.kaggle', exist_ok=True)\n", "shutil.move('kaggle.json', '/root/.kaggle/kaggle.json')\n", "os.chmod('/root/.kaggle/kaggle.json', 0o600)\n", "\n", "!pip install -q kaggle\n", "print(\"Kaggle CLI ready.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "0_M0IlMaCffT", "executionInfo": { "status": "ok", "timestamp": 1777462812631, "user_tz": 420, "elapsed": 7169, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "402c3aa2-f5ba-424d-f4bf-49b3fa726d1a" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Kaggle CLI ready.\n" ] } ] }, { "cell_type": "code", "source": [ "import os, time\n", "\n", "DOWNLOAD_DIR = '/content/kaggle_download'\n", "os.makedirs(DOWNLOAD_DIR, exist_ok=True)\n", "\n", "print(\"Downloading ASVspoof 2019 LA from Kaggle...\")\n", "print(\"Expected: ~7.6 GB, ~3-5 minutes\\n\")\n", "start = time.time()\n", "\n", "!kaggle datasets download -d anishsarkar22/asvpoof-2019-dataset-la -p {DOWNLOAD_DIR} --unzip --force\n", "\n", "elapsed_min = (time.time() - start) / 60\n", "print(f\"\\nDownload+unzip done in {elapsed_min:.1f} minutes.\")\n", "\n", "# Verify on local disk\n", "local_la = f'{DOWNLOAD_DIR}/LA'\n", "print(f\"\\nLocal LA exists: {os.path.exists(local_la)}\")\n", "if os.path.exists(local_la):\n", " print(f\"Contents: {sorted(os.listdir(local_la))}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "UvSNdUIvCjuj", "executionInfo": { "status": "ok", "timestamp": 1777463338260, "user_tz": 420, "elapsed": 515529, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "189c7e94-ebb4-48f0-af93-d28e7b5d64ec" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Downloading ASVspoof 2019 LA from Kaggle...\n", "Expected: ~7.6 GB, ~3-5 minutes\n", "\n", "Dataset URL: https://www.kaggle.com/datasets/anishsarkar22/asvpoof-2019-dataset-la\n", "License(s): ODC Attribution License (ODC-By)\n", "Downloading asvpoof-2019-dataset-la.zip to /content/kaggle_download\n", "100% 7.12G/7.12G [07:12<00:00, 17.7MB/s]\n", "\n", "\n", "Download+unzip done in 8.6 minutes.\n", "\n", "Local LA exists: True\n", "Contents: ['ASVspoof2019_LA_asv_protocols', 'ASVspoof2019_LA_asv_scores', 'ASVspoof2019_LA_cm_protocols', 'ASVspoof2019_LA_dev', 'ASVspoof2019_LA_eval', 'ASVspoof2019_LA_train', 'README.LA.txt']\n" ] } ] }, { "cell_type": "code", "source": [ "import os, time\n", "\n", "SRC = '/content/kaggle_download/LA'\n", "DST = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA'\n", "\n", "# Make sure parent exists\n", "os.makedirs(os.path.dirname(DST), exist_ok=True)\n", "\n", "# Remove any partial DST from before\n", "if os.path.exists(DST):\n", " print(f\"Cleaning empty/stale {DST}...\")\n", " import shutil\n", " shutil.rmtree(DST)\n", "\n", "print(f\"Copying {SRC} -> {DST}\")\n", "print(\"This time we COPY (not move) so we keep a local backup until Drive verification passes.\\n\")\n", "\n", "start = time.time()\n", "!cp -r {SRC} {DST}\n", "elapsed = (time.time() - start) / 60\n", "print(f\"\\nLocal copy done in {elapsed:.1f} minutes.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Q5vCXMv3E4T9", "executionInfo": { "status": "ok", "timestamp": 1777467828281, "user_tz": 420, "elapsed": 3549729, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "c2ac47a7-c0b2-44d7-9a12-668575601581" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Cleaning empty/stale /content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA...\n", "Copying /content/kaggle_download/LA -> /content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA\n", "This time we COPY (not move) so we keep a local backup until Drive verification passes.\n", "\n", "\n", "Local copy done in 52.6 minutes.\n" ] } ] }, { "cell_type": "code", "source": [ "import os, glob\n", "\n", "DST = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA'\n", "\n", "print(f\"Drive copy at: {DST}\")\n", "print(f\"Exists: {os.path.exists(DST)}\\n\")\n", "\n", "if os.path.exists(DST):\n", " contents = sorted(os.listdir(DST))\n", " print(f\"Top-level ({len(contents)} items): {contents}\\n\")\n", "\n", " expected = {'train': 25380, 'dev': 24986, 'eval': 71933}\n", " all_ok = True\n", " for p, n_expected in expected.items():\n", " flac_dir = os.path.join(DST, f'ASVspoof2019_LA_{p}', 'flac')\n", " if os.path.exists(flac_dir):\n", " n = len(glob.glob(os.path.join(flac_dir, '*.flac')))\n", " status = \"OK\" if n == n_expected else f\"SHORT (expected {n_expected})\"\n", " if n != n_expected:\n", " all_ok = False\n", " print(f\" [{status}] {p}: {n:,} / {n_expected:,} files\")\n", " else:\n", " print(f\" [MISSING] {p}\")\n", " all_ok = False\n", "\n", " print(f\"\\nProtocol files:\")\n", " proto_dir = os.path.join(DST, 'ASVspoof2019_LA_cm_protocols')\n", " if os.path.exists(proto_dir):\n", " for f in sorted(os.listdir(proto_dir)):\n", " print(f\" - {f}\")\n", " else:\n", " all_ok = False\n", "\n", " print(f\"\\nOverall: {'ALL FILES PRESENT' if all_ok else 'INCOMPLETE'}\")\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "baFLNQjmWW1z", "executionInfo": { "status": "ok", "timestamp": 1777468023153, "user_tz": 420, "elapsed": 7895, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "ff4ca10e-0703-4141-ebc2-b9dd025d2218" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Drive copy at: /content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA\n", "Exists: True\n", "\n", "Top-level (7 items): ['ASVspoof2019_LA_asv_protocols', 'ASVspoof2019_LA_asv_scores', 'ASVspoof2019_LA_cm_protocols', 'ASVspoof2019_LA_dev', 'ASVspoof2019_LA_eval', 'ASVspoof2019_LA_train', 'README.LA.txt']\n", "\n", " [OK] train: 25,380 / 25,380 files\n", " [OK] dev: 24,986 / 24,986 files\n", " [OK] eval: 71,933 / 71,933 files\n", "\n", "Protocol files:\n", " - ASVspoof2019.LA.cm.dev.trl.txt\n", " - ASVspoof2019.LA.cm.eval.trl.txt\n", " - ASVspoof2019.LA.cm.train.trn.txt\n", "\n", "Overall: ALL FILES PRESENT\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "from google.colab import drive\n", "\n", "# If something's already mounted, force-remount cleanly\n", "try:\n", " drive.flush_and_unmount()\n", "except:\n", " pass\n", "\n", "drive.mount('/content/drive', force_remount=True)\n", "\n", "# Now verify\n", "DST = '/content/drive/MyDrive/deepfake_audio/data/raw/asvspoof_2019/LA'\n", "print(f\"\\nDST exists: {os.path.exists(DST)}\")\n", "\n", "if os.path.exists(DST):\n", " import glob\n", " expected = {'train': 25380, 'dev': 24986, 'eval': 71933}\n", " all_ok = True\n", " for p, n_expected in expected.items():\n", " flac_dir = os.path.join(DST, f'ASVspoof2019_LA_{p}', 'flac')\n", " n = len(glob.glob(os.path.join(flac_dir, '*.flac'))) if os.path.exists(flac_dir) else 0\n", " status = \"OK\" if n == n_expected else \"SHORT\"\n", " if n != n_expected:\n", " all_ok = False\n", " print(f\" [{status}] {p}: {n:,} / {n_expected:,}\")\n", " print(f\"\\nVerdict: {'PERSISTED CORRECTLY' if all_ok else 'DATA LOST AGAIN'}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5oOsg3U5bdNi", "executionInfo": { "status": "ok", "timestamp": 1777486264222, "user_tz": 420, "elapsed": 84876, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "115cef17-5f7c-437c-c699-307de5fa0c9f" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Drive not mounted, so nothing to flush and unmount.\n", "Mounted at /content/drive\n", "\n", "DST exists: True\n", " [SHORT] train: 0 / 25,380\n", " [SHORT] dev: 11,009 / 24,986\n", " [SHORT] eval: 0 / 71,933\n", "\n", "Verdict: DATA LOST AGAIN\n" ] } ] }, { "cell_type": "code", "source": [ "import os, glob\n", "\n", "LOCAL_LA = '/content/kaggle_download/LA'\n", "print(f\"Local copy at: {LOCAL_LA}\")\n", "print(f\"Exists: {os.path.exists(LOCAL_LA)}\\n\")\n", "\n", "if os.path.exists(LOCAL_LA):\n", " expected = {'train': 25380, 'dev': 24986, 'eval': 71933}\n", " all_ok = True\n", " for p, n_expected in expected.items():\n", " flac_dir = os.path.join(LOCAL_LA, f'ASVspoof2019_LA_{p}', 'flac')\n", " n = len(glob.glob(os.path.join(flac_dir, '*.flac'))) if os.path.exists(flac_dir) else 0\n", " status = \"OK\" if n == n_expected else \"SHORT\"\n", " if n != n_expected:\n", " all_ok = False\n", " print(f\" [{status}] {p}: {n:,} / {n_expected:,}\")\n", " print(f\"\\nLocal copy: {'INTACT' if all_ok else 'INCOMPLETE'}\")\n", "else:\n", " print(\"Local copy is gone too \u2014 Colab disk was wiped on disconnect.\")\n", " print(\"We'll need to re-download.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "qckuWNI6cYrS", "executionInfo": { "status": "ok", "timestamp": 1777486371194, "user_tz": 420, "elapsed": 22, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "eb94d511-c483-40ee-9f01-864b90161003" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Local copy at: /content/kaggle_download/LA\n", "Exists: False\n", "\n", "Local copy is gone too \u2014 Colab disk was wiped on disconnect.\n", "We'll need to re-download.\n" ] } ] }, { "cell_type": "code", "source": [ "from google.colab import files\n", "print(\"Re-uploading kaggle.json...\")\n", "uploaded = files.upload()\n", "\n", "import os, shutil\n", "os.makedirs('/root/.kaggle', exist_ok=True)\n", "shutil.move('kaggle.json', '/root/.kaggle/kaggle.json')\n", "os.chmod('/root/.kaggle/kaggle.json', 0o600)\n", "print(\"kaggle.json configured.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 107 }, "id": "RQ0T1sJDfiBU", "executionInfo": { "status": "ok", "timestamp": 1777487202546, "user_tz": 420, "elapsed": 7173, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "ecd4eb87-1d31-4128-e97a-a1b0953e2866" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Re-uploading kaggle.json...\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " \n", " Upload widget is only available when the cell has been executed in the\n", " current browser session. Please rerun this cell to enable.\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Saving kaggle.json to kaggle.json\n", "kaggle.json configured.\n" ] } ] }, { "cell_type": "code", "source": [ "\"\"\"\n", "SESSION BOOTSTRAP\n", "Run this once at the start of every Colab session.\n", "Restores: Drive mount, dataset on local disk, repo clone, Python imports.\n", "\"\"\"\n", "import os, sys, time\n", "\n", "# ----- 1. Mount Drive (only used for checkpoints, NOT dataset) -----\n", "print(\"=\" * 60)\n", "print(\"Step 1/4: Mount Drive\")\n", "print(\"=\" * 60)\n", "DRIVE_ROOT = '/content/drive/MyDrive'\n", "if not os.path.exists(DRIVE_ROOT):\n", " from google.colab import drive\n", " drive.mount('/content/drive')\n", "print(\"Drive mounted.\\n\")\n", "\n", "# Make sure checkpoint folder exists\n", "os.makedirs('/content/drive/MyDrive/deepfake_audio/checkpoints', exist_ok=True)\n", "os.makedirs('/content/drive/MyDrive/deepfake_audio/logs', exist_ok=True)\n", "\n", "\n", "# ----- 2. Clone repo to local disk -----\n", "print(\"=\" * 60)\n", "print(\"Step 2/4: Clone repo\")\n", "print(\"=\" * 60)\n", "REPO_DIR = '/content/deepfake-audio-detection'\n", "if not os.path.exists(REPO_DIR):\n", " !git clone https://github.com/Saracasm/deepfake-audio-detection.git {REPO_DIR}\n", "else:\n", " !cd {REPO_DIR} && git pull --quiet\n", "print(f\"Repo at: {REPO_DIR}\\n\")\n", "\n", "\n", "# ----- 3. Re-download dataset to local disk -----\n", "print(\"=\" * 60)\n", "print(\"Step 3/4: Re-download dataset (~5-8 min)\")\n", "print(\"=\" * 60)\n", "LOCAL_LA = '/content/kaggle_download/LA'\n", "\n", "if os.path.exists(LOCAL_LA):\n", " print(\"Dataset already present on local disk \u2014 skipping download.\")\n", "else:\n", " # Need kaggle.json for this \u2014 assume it's been uploaded to /root/.kaggle\n", " if not os.path.exists('/root/.kaggle/kaggle.json'):\n", " print(\"ERROR: kaggle.json not configured.\")\n", " print(\"Run the kaggle.json upload cell first, then re-run this.\")\n", " raise SystemExit(\"Need kaggle credentials\")\n", "\n", " !pip install -q kaggle\n", " os.makedirs('/content/kaggle_download', exist_ok=True)\n", "\n", " start = time.time()\n", " !kaggle datasets download -d anishsarkar22/asvpoof-2019-dataset-la \\\n", " -p /content/kaggle_download --unzip --force --quiet\n", " print(f\"Downloaded in {(time.time()-start)/60:.1f} minutes.\")\n", "\n", "print(f\"Dataset at: {LOCAL_LA}\\n\")\n", "\n", "\n", "# ----- 4. Set up Python imports -----\n", "print(\"=\" * 60)\n", "print(\"Step 4/4: Set up Python imports\")\n", "print(\"=\" * 60)\n", "sys.path.insert(0, REPO_DIR)\n", "\n", "# Define key paths used throughout the notebook\n", "LA_ROOT = LOCAL_LA # IMPORTANT: dataset lives on LOCAL disk now, not Drive\n", "print(f\"LA_ROOT = {LA_ROOT}\")\n", "print(f\"REPO_DIR = {REPO_DIR}\")\n", "print(\"\\nBootstrap complete. Ready to work.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ASYdCt45flTc", "executionInfo": { "status": "ok", "timestamp": 1777487423786, "user_tz": 420, "elapsed": 211230, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "d47a1380-b3fa-4dcc-9701-e24e40173ee2" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "============================================================\n", "Step 1/4: Mount Drive\n", "============================================================\n", "Drive mounted.\n", "\n", "============================================================\n", "Step 2/4: Clone repo\n", "============================================================\n", "Cloning into '/content/deepfake-audio-detection'...\n", "remote: Enumerating objects: 42, done.\u001b[K\n", "remote: Counting objects: 100% (42/42), done.\u001b[K\n", "remote: Compressing objects: 100% (35/35), done.\u001b[K\n", "Receiving objects: 100% (42/42), 20.42 KiB | 3.40 MiB/s, done.\n", "remote: Total 42 (delta 13), reused 7 (delta 1), pack-reused 0 (from 0)\u001b[K\n", "Resolving deltas: 100% (13/13), done.\n", "Repo at: /content/deepfake-audio-detection\n", "\n", "============================================================\n", "Step 3/4: Re-download dataset (~5-8 min)\n", "============================================================\n", "Dataset URL: https://www.kaggle.com/datasets/anishsarkar22/asvpoof-2019-dataset-la\n", "License(s): ODC Attribution License (ODC-By)\n", "Downloaded in 3.4 minutes.\n", "Dataset at: /content/kaggle_download/LA\n", "\n", "============================================================\n", "Step 4/4: Set up Python imports\n", "============================================================\n", "LA_ROOT = /content/kaggle_download/LA\n", "REPO_DIR = /content/deepfake-audio-detection\n", "\n", "Bootstrap complete. Ready to work.\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "REPO_DIR = '/content/deepfake-audio-detection'\n", "files_to_check = [\n", " 'src/data/protocols.py',\n", " 'src/data/preprocessing.py',\n", " 'src/data/dataset.py',\n", "]\n", "for f in files_to_check:\n", " full = os.path.join(REPO_DIR, f)\n", " exists = os.path.exists(full)\n", " size = os.path.getsize(full) if exists else 0\n", " print(f\" [{'OK' if exists else 'MISSING'}] {f} ({size} bytes)\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "T-jcxmpZg4sJ", "executionInfo": { "status": "ok", "timestamp": 1777487549786, "user_tz": 420, "elapsed": 34, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "be96b914-5f22-44d2-b4af-59c7b2826b8b" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " [OK] src/data/protocols.py (3284 bytes)\n", " [MISSING] src/data/preprocessing.py (0 bytes)\n", " [MISSING] src/data/dataset.py (0 bytes)\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "\n", "# ----- preprocessing.py -----\n", "PREPROCESSING_PY = '''\"\"\"\n", "Audio preprocessing for ASVspoof 2019 LA.\n", "\n", "Given a .flac path, produces a list of fixed-length 4-second segments at 16kHz\n", "mono, ready to feed into Wav2Vec 2.0.\n", "\n", "Pipeline:\n", " load .flac -> ensure mono -> ensure 16kHz -> window with 50% overlap\n", " (short clips are zero-padded to one full window)\n", "\"\"\"\n", "\n", "from typing import List\n", "import torch\n", "import torchaudio\n", "import torchaudio.functional as F\n", "\n", "\n", "SAMPLE_RATE = 16000\n", "WINDOW_SECONDS = 4.0\n", "OVERLAP_RATIO = 0.5\n", "\n", "WINDOW_SAMPLES = int(SAMPLE_RATE * WINDOW_SECONDS) # 64000\n", "HOP_SAMPLES = int(WINDOW_SAMPLES * (1.0 - OVERLAP_RATIO)) # 32000\n", "\n", "\n", "def load_audio(path: str, target_sr: int = SAMPLE_RATE) -> torch.Tensor:\n", " \"\"\"Load a .flac file and return a 1-D mono waveform at target_sr.\"\"\"\n", " waveform, sr = torchaudio.load(path)\n", " if waveform.shape[0] > 1:\n", " waveform = waveform.mean(dim=0, keepdim=True)\n", " if sr != target_sr:\n", " waveform = F.resample(waveform, orig_freq=sr, new_freq=target_sr)\n", " return waveform.squeeze(0)\n", "\n", "\n", "def segment_waveform(\n", " waveform: torch.Tensor,\n", " window_samples: int = WINDOW_SAMPLES,\n", " hop_samples: int = HOP_SAMPLES,\n", ") -> List[torch.Tensor]:\n", " \"\"\"Split a 1-D waveform into fixed-length windows with overlap.\"\"\"\n", " n = waveform.shape[0]\n", " if n <= window_samples:\n", " padded = torch.zeros(window_samples, dtype=waveform.dtype)\n", " padded[:n] = waveform\n", " return [padded]\n", "\n", " windows = []\n", " start = 0\n", " while start < n:\n", " end = start + window_samples\n", " if end <= n:\n", " windows.append(waveform[start:end])\n", " else:\n", " tail = waveform[start:]\n", " padded = torch.zeros(window_samples, dtype=waveform.dtype)\n", " padded[:tail.shape[0]] = tail\n", " windows.append(padded)\n", " break\n", " start += hop_samples\n", " return windows\n", "\n", "\n", "def preprocess(path: str) -> List[torch.Tensor]:\n", " \"\"\"Full pipeline: load + window.\"\"\"\n", " waveform = load_audio(path)\n", " return segment_waveform(waveform)\n", "'''\n", "\n", "# ----- dataset.py -----\n", "DATASET_PY = '''\"\"\"\n", "PyTorch Dataset for ASVspoof 2019 LA.\n", "\n", "One Dataset entry = one 4-second audio window + its binary label.\n", "Long utterances expand into multiple entries (one per window).\n", "\"\"\"\n", "\n", "from typing import List, Tuple\n", "import torch\n", "from torch.utils.data import Dataset\n", "\n", "from src.data.protocols import Utterance\n", "from src.data.preprocessing import (\n", " load_audio,\n", " segment_waveform,\n", " WINDOW_SAMPLES,\n", " HOP_SAMPLES,\n", ")\n", "\n", "\n", "def _count_windows(duration_samples: int) -> int:\n", " if duration_samples <= WINDOW_SAMPLES:\n", " return 1\n", " n = (duration_samples - WINDOW_SAMPLES + HOP_SAMPLES - 1) // HOP_SAMPLES + 1\n", " return max(1, n)\n", "\n", "\n", "class ASVspoofDataset(Dataset):\n", " \"\"\"One sample = one 4-sec window + its label.\"\"\"\n", "\n", " def __init__(\n", " self,\n", " utterances: List[Utterance],\n", " durations_samples: List[int] = None,\n", " ):\n", " self.utterances = utterances\n", " self.index: List[Tuple[int, int]] = []\n", " if durations_samples is None:\n", " for i in range(len(utterances)):\n", " self.index.append((i, 0))\n", " else:\n", " assert len(durations_samples) == len(utterances)\n", " for i, d in enumerate(durations_samples):\n", " n_windows = _count_windows(d)\n", " for w in range(n_windows):\n", " self.index.append((i, w))\n", "\n", " def __len__(self) -> int:\n", " return len(self.index)\n", "\n", " def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int, str]:\n", " utt_idx, window_idx = self.index[idx]\n", " utt = self.utterances[utt_idx]\n", " waveform = load_audio(utt.flac_path)\n", " windows = segment_waveform(waveform)\n", " window_idx = min(window_idx, len(windows) - 1)\n", " return windows[window_idx], utt.label_int, utt.utterance_id\n", "'''\n", "\n", "# Write both files\n", "files_to_write = {\n", " '/content/deepfake-audio-detection/src/data/preprocessing.py': PREPROCESSING_PY,\n", " '/content/deepfake-audio-detection/src/data/dataset.py': DATASET_PY,\n", "}\n", "\n", "for path, content in files_to_write.items():\n", " os.makedirs(os.path.dirname(path), exist_ok=True)\n", " with open(path, 'w') as f:\n", " f.write(content)\n", " print(f\"Wrote {path} ({len(content)} bytes)\")\n", "\n", "# Verify\n", "print(\"\\nFinal verification:\")\n", "for f in [\n", " 'src/data/protocols.py',\n", " 'src/data/preprocessing.py',\n", " 'src/data/dataset.py',\n", "]:\n", " full = os.path.join('/content/deepfake-audio-detection', f)\n", " size = os.path.getsize(full) if os.path.exists(full) else 0\n", " print(f\" [{'OK' if size > 0 else 'MISSING'}] {f} ({size} bytes)\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "CfYkuSC0hNx3", "executionInfo": { "status": "ok", "timestamp": 1777487636858, "user_tz": 420, "elapsed": 175, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "2f6ae2b5-0449-4ff0-f633-e547cc134a32" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Wrote /content/deepfake-audio-detection/src/data/preprocessing.py (1990 bytes)\n", "Wrote /content/deepfake-audio-detection/src/data/dataset.py (1741 bytes)\n", "\n", "Final verification:\n", " [OK] src/data/protocols.py (3284 bytes)\n", " [OK] src/data/preprocessing.py (1990 bytes)\n", " [OK] src/data/dataset.py (1741 bytes)\n" ] } ] }, { "cell_type": "code", "source": [ "from google.colab import userdata\n", "import os\n", "\n", "# Get token from Secrets\n", "GITHUB_TOKEN = userdata.get('GITHUB_TOKEN')\n", "GITHUB_USER = \"Saracasm\"\n", "REPO = \"deepfake-audio-detection\"\n", "\n", "REPO_DIR = '/content/deepfake-audio-detection'\n", "os.chdir(REPO_DIR)\n", "\n", "# Configure git identity (needs to be re-set after each repo re-clone)\n", "!git config user.email \"95262824+Saracasm@users.noreply.github.com\"\n", "!git config user.name \"Sara Iqbal\"\n", "\n", "# Stage the new files\n", "!git add src/data/preprocessing.py src/data/dataset.py\n", "!git status\n", "\n", "# Commit\n", "!git commit -m \"Phase 2: add preprocessing and dataset modules\"\n", "\n", "# Push using token from secrets\n", "push_url = f\"https://{GITHUB_USER}:{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{REPO}.git\"\n", "!git push {push_url} main 2>&1 | grep -v {GITHUB_TOKEN}" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "H2pMetC5hq2K", "executionInfo": { "status": "ok", "timestamp": 1777487758527, "user_tz": 420, "elapsed": 2943, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "be5dc684-f4fe-410b-e74e-6a2d4eb4a81a" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "On branch main\n", "Your branch is up to date with 'origin/main'.\n", "\n", "Changes to be committed:\n", " (use \"git restore --staged ...\" to unstage)\n", "\t\u001b[32mnew file: src/data/dataset.py\u001b[m\n", "\t\u001b[32mnew file: src/data/preprocessing.py\u001b[m\n", "\n", "[main 1274341] Phase 2: add preprocessing and dataset modules\n", " 2 files changed, 124 insertions(+)\n", " create mode 100644 src/data/dataset.py\n", " create mode 100644 src/data/preprocessing.py\n", "To https://github.com/Saracasm/deepfake-audio-detection.git\n", " a14fd25..1274341 main -> main\n" ] } ] }, { "cell_type": "code", "source": [ "# from google.colab import userdata\n", "# import os, shutil, glob\n", "\n", "# GITHUB_TOKEN = userdata.get('GITHUB_TOKEN')\n", "# GITHUB_USER = \"Saracasm\"\n", "# REPO = \"deepfake-audio-detection\"\n", "# REPO_DIR = '/content/deepfake-audio-detection'\n", "\n", "# # Find the notebook in Drive (search for any 01_*.ipynb file)\n", "# print(\"Searching for the notebook in Drive...\")\n", "# candidates = []\n", "# for root, dirs, files in os.walk('/content/drive/MyDrive'):\n", "# if 'deepfake_audio' in root and 'data' in root:\n", "# dirs.clear()\n", "# continue\n", "# for f in files:\n", "# if f.endswith('.ipynb') and ('01' in f or 'data_acquisition' in f.lower()):\n", "# full = os.path.join(root, f)\n", "# candidates.append((full, os.path.getsize(full)))\n", "\n", "# if not candidates:\n", "# print(\"\\nNo notebook found. You need to first do File -> Save in Colab.\")\n", "# print(\"Then re-run this cell.\")\n", "# else:\n", "# print(f\"\\nFound {len(candidates)} candidate(s):\")\n", "# for path, size in candidates:\n", "# print(f\" [{size/1024:.1f} KB] {path}\")\n", "\n", "# # Pick the largest (most recently saved with most content, usually)\n", "# best = max(candidates, key=lambda x: x[1])\n", "# src_path = best[0]\n", "# print(f\"\\nUsing: {src_path}\")\n", "\n", "# # Copy to repo\n", "# target = f\"{REPO_DIR}/notebooks/01_data_acquisition.ipynb\"\n", "# os.makedirs(os.path.dirname(target), exist_ok=True)\n", "# shutil.copy(src_path, target)\n", "# print(f\"Copied to: {target}\")\n", "\n", "# # Commit and push\n", "# os.chdir(REPO_DIR)\n", "# !git add notebooks/01_data_acquisition.ipynb\n", "# !git commit -m \"Phase 2: add data acquisition notebook\"\n", "\n", "# push_url = f\"https://{GITHUB_USER}:{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{REPO}.git\"\n", "# !git push {push_url} main 2>&1 | grep -v {GITHUB_TOKEN}" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pzIQs8dmjAiN", "executionInfo": { "status": "ok", "timestamp": 1777488118735, "user_tz": 420, "elapsed": 12064, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "1ad8a85b-df97-42a2-cd53-8aa377c8be84" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Searching for the notebook in Drive...\n", "\n", "Found 2 candidate(s):\n", " [296.7 KB] /content/drive/MyDrive/Colab Notebooks/01_data_acquisition.ipynb\n", " [354.5 KB] /content/drive/MyDrive/Colab Notebooks/Copy of 01_data_acquisition.ipynb\n", "\n", "Using: /content/drive/MyDrive/Colab Notebooks/Copy of 01_data_acquisition.ipynb\n", "Copied to: /content/deepfake-audio-detection/notebooks/01_data_acquisition.ipynb\n", "[main 015ead7] Phase 2: add data acquisition notebook\n", " 1 file changed, 1 insertion(+)\n", " create mode 100644 notebooks/01_data_acquisition.ipynb\n", "remote: error: GH013: Repository rule violations found for refs/heads/main. \n", "remote: \n", "remote: - GITHUB PUSH PROTECTION \n", "remote: \u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014 \n", "remote: Resolve the following violations before pushing again \n", "remote: \n", "remote: - Push cannot contain secrets \n", "remote: \n", "remote: \n", "remote: (?) Learn how to resolve a blocked push \n", "remote: https://docs.github.com/code-security/secret-scanning/working-with-secret-scanning-and-push-protection/working-with-push-protection-from-the-command-line#resolving-a-blocked-push \n", "remote: \n", "remote: (?) This repository does not have Secret Scanning enabled, but is eligible. Enable Secret Scanning to view and manage detected secrets. \n", "remote: Visit the repository settings page, https://github.com/Saracasm/deepfake-audio-detection/settings/security_analysis \n", "remote: \n", "remote: \n", "remote: \u2014\u2014 GitHub Personal Access Token \u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014 \n", "remote: locations: \n", "remote: - commit: 015ead78f7a250222f0b2a2c1ec217c5f0b7e48f \n", "remote: path: notebooks/01_data_acquisition.ipynb:1 \n", "remote: - commit: 015ead78f7a250222f0b2a2c1ec217c5f0b7e48f \n", "remote: path: notebooks/01_data_acquisition.ipynb:1 \n", "remote: \n", "remote: (?) To push, remove secret from commit(s) or follow this URL to allow the secret. \n", "remote: https://github.com/Saracasm/deepfake-audio-detection/security/secret-scanning/unblock-secret/3D2lbumZMuwSyxKWNc9mCnXElLu \n", "remote: \n", "remote: \n", "remote: \n", "To https://github.com/Saracasm/deepfake-audio-detection.git\n", " ! [remote rejected] main -> main (push declined due to repository rule violations)\n", "error: failed to push some refs to 'https://github.com/Saracasm/deepfake-audio-detection.git'\n" ] } ] }, { "cell_type": "code", "source": [ "import json, re\n", "\n", "NOTEBOOK = '/content/deepfake-audio-detection/notebooks/01_data_acquisition.ipynb'\n", "\n", "with open(NOTEBOOK, 'r') as f:\n", " nb = json.load(f)\n", "\n", "# Look for any token strings in cell sources and outputs\n", "TOKEN_PATTERN = re.compile(r'ghp_[A-Za-z0-9]{36}')\n", "n_replaced = 0\n", "\n", "for cell in nb.get('cells', []):\n", " # Clean source\n", " if 'source' in cell:\n", " if isinstance(cell['source'], list):\n", " for i, line in enumerate(cell['source']):\n", " if TOKEN_PATTERN.search(line):\n", " cell['source'][i] = TOKEN_PATTERN.sub('REDACTED_TOKEN', line)\n", " n_replaced += 1\n", " elif isinstance(cell['source'], str):\n", " if TOKEN_PATTERN.search(cell['source']):\n", " cell['source'] = TOKEN_PATTERN.sub('REDACTED_TOKEN', cell['source'])\n", " n_replaced += 1\n", "\n", " # Clean outputs (in case it appeared in command output too)\n", " for output in cell.get('outputs', []):\n", " if 'text' in output:\n", " if isinstance(output['text'], list):\n", " for i, line in enumerate(output['text']):\n", " if TOKEN_PATTERN.search(line):\n", " output['text'][i] = TOKEN_PATTERN.sub('REDACTED_TOKEN', line)\n", " n_replaced += 1\n", " elif isinstance(output['text'], str):\n", " if TOKEN_PATTERN.search(output['text']):\n", " output['text'] = TOKEN_PATTERN.sub('REDACTED_TOKEN', output['text'])\n", " n_replaced += 1\n", "\n", "with open(NOTEBOOK, 'w') as f:\n", " json.dump(nb, f, indent=1)\n", "\n", "print(f\"Replaced {n_replaced} occurrence(s) of the token in {NOTEBOOK}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-BYmjugQkUDJ", "executionInfo": { "status": "ok", "timestamp": 1777488448968, "user_tz": 420, "elapsed": 24, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "f93ba54f-6f4a-4869-8f3f-20f0b32a25b2" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Replaced 2 occurrence(s) of the token in /content/deepfake-audio-detection/notebooks/01_data_acquisition.ipynb\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "os.chdir('/content/deepfake-audio-detection')\n", "\n", "# Roll back the failed commit (but keep the file changes)\n", "!git reset --soft HEAD~1\n", "\n", "# Verify\n", "!git status\n", "!git log --oneline -3" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "xdhRFETlkVwu", "executionInfo": { "status": "ok", "timestamp": 1777488460139, "user_tz": 420, "elapsed": 397, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "9248dd0a-2262-4958-cceb-776c022523a9" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "On branch main\n", "Your branch is ahead of 'origin/main' by 1 commit.\n", " (use \"git push\" to publish your local commits)\n", "\n", "Changes to be committed:\n", " (use \"git restore --staged ...\" to unstage)\n", "\t\u001b[32mnew file: notebooks/01_data_acquisition.ipynb\u001b[m\n", "\n", "Changes not staged for commit:\n", " (use \"git add ...\" to update what will be committed)\n", " (use \"git restore ...\" to discard changes in working directory)\n", "\t\u001b[31mmodified: notebooks/01_data_acquisition.ipynb\u001b[m\n", "\n", "\u001b[33m1274341\u001b[m\u001b[33m (\u001b[m\u001b[1;36mHEAD -> \u001b[m\u001b[1;32mmain\u001b[m\u001b[33m)\u001b[m Phase 2: add preprocessing and dataset modules\n", "\u001b[33ma14fd25\u001b[m\u001b[33m (\u001b[m\u001b[1;31morigin/main\u001b[m\u001b[33m, \u001b[m\u001b[1;31morigin/HEAD\u001b[m\u001b[33m)\u001b[m Phase 2: add ASVspoof 2019 LA protocol parser\n", "\u001b[33md9f77a5\u001b[m Fix in config.yaml for wandb entity\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "os.chdir('/content/deepfake-audio-detection')\n", "\n", "!git add notebooks/01_data_acquisition.ipynb\n", "!git status" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "oiTnCNXykfxu", "executionInfo": { "status": "ok", "timestamp": 1777488496689, "user_tz": 420, "elapsed": 294, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "25147d9e-a2d1-4506-c723-61cc08d8fcc4" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "On branch main\n", "Your branch is ahead of 'origin/main' by 1 commit.\n", " (use \"git push\" to publish your local commits)\n", "\n", "Changes to be committed:\n", " (use \"git restore --staged ...\" to unstage)\n", "\t\u001b[32mnew file: notebooks/01_data_acquisition.ipynb\u001b[m\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "import re\n", "TOKEN_PATTERN = re.compile(r'ghp_[A-Za-z0-9]{36}')\n", "\n", "with open('notebooks/01_data_acquisition.ipynb', 'r') as f:\n", " content = f.read()\n", "\n", "matches = TOKEN_PATTERN.findall(content)\n", "print(f\"Token instances remaining in notebook: {len(matches)}\")\n", "if matches:\n", " print(\"STILL CONTAINS TOKENS \u2014 do not push!\")\n", "else:\n", " print(\"Notebook is clean. Safe to push.\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KOhmRCvYkpJn", "executionInfo": { "status": "ok", "timestamp": 1777488534931, "user_tz": 420, "elapsed": 47, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "1a753ad5-3e4b-4cd1-f2fd-166fdf4dcb91" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Token instances remaining in notebook: 0\n", "Notebook is clean. Safe to push.\n" ] } ] }, { "cell_type": "code", "source": [ "from google.colab import userdata\n", "\n", "GITHUB_TOKEN = userdata.get('GITHUB_TOKEN')\n", "\n", "!git commit -m \"Phase 2: add data acquisition notebook\"\n", "\n", "push_url = f\"https://Saracasm:{GITHUB_TOKEN}@github.com/Saracasm/deepfake-audio-detection.git\"\n", "!git push {push_url} main 2>&1 | grep -v {GITHUB_TOKEN}" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MY6CQcTaks2z", "executionInfo": { "status": "ok", "timestamp": 1777488553942, "user_tz": 420, "elapsed": 1725, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "4a8a4c00-4dfe-4ad1-9d2a-d4d0f104deb7" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[main a3d84a6] Phase 2: add data acquisition notebook\n", " 1 file changed, 4486 insertions(+)\n", " create mode 100644 notebooks/01_data_acquisition.ipynb\n", "To https://github.com/Saracasm/deepfake-audio-detection.git\n", " 1274341..a3d84a6 main -> main\n" ] } ] }, { "cell_type": "code", "source": [ "import sys, importlib\n", "\n", "# Make sure repo is on Python path\n", "sys.path.insert(0, '/content/deepfake-audio-detection')\n", "\n", "# Reload modules in case anything was cached\n", "for mod in ['src.data.protocols', 'src.data.preprocessing', 'src.data.dataset']:\n", " if mod in sys.modules:\n", " importlib.reload(sys.modules[mod])\n", "\n", "from src.data.protocols import parse_all_partitions, class_counts\n", "from src.data.preprocessing import load_audio, segment_waveform, WINDOW_SAMPLES, HOP_SAMPLES\n", "from src.data.dataset import ASVspoofDataset, _count_windows\n", "\n", "# Re-parse protocols against the local dataset\n", "LA_ROOT = '/content/kaggle_download/LA'\n", "splits = parse_all_partitions(LA_ROOT)\n", "\n", "print(\"Modules imported, protocols parsed:\")\n", "for name, utts in splits.items():\n", " counts = class_counts(utts)\n", " print(f\" {name}: {len(utts):,} ({counts['bonafide']:,} bonafide, {counts['spoof']:,} spoof)\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "U-rDtkZ1r3OA", "executionInfo": { "status": "ok", "timestamp": 1777490433342, "user_tz": 420, "elapsed": 5460, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "7df08510-2b4d-4942-c3d4-dc1df3629820" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Modules imported, protocols parsed:\n", " train: 25,380 (2,580 bonafide, 22,800 spoof)\n", " dev: 24,844 (2,548 bonafide, 22,296 spoof)\n", " eval: 71,237 (7,355 bonafide, 63,882 spoof)\n" ] } ] }, { "cell_type": "code", "source": [ "import torchaudio\n", "from tqdm import tqdm\n", "\n", "print(\"Measuring durations of all training clips...\")\n", "\n", "# Try torchaudio.info first (fast, metadata only); fall back to load if needed\n", "try:\n", " sample = splits['train'][0]\n", " test = torchaudio.info(sample.flac_path)\n", " use_info = True\n", " print(\"Using fast metadata reads (torchaudio.info)\\n\")\n", "except (AttributeError, RuntimeError):\n", " use_info = False\n", " print(\"Falling back to torchaudio.load (slower but works)\\n\")\n", "\n", "train_durations = []\n", "for u in tqdm(splits['train'], desc=\"Train\"):\n", " if use_info:\n", " train_durations.append(torchaudio.info(u.flac_path).num_frames)\n", " else:\n", " waveform, _ = torchaudio.load(u.flac_path)\n", " train_durations.append(waveform.shape[1])\n", "\n", "print(f\"\\nMeasured {len(train_durations):,} clip durations.\")\n", "print(f\"Min: {min(train_durations):>10,} samples ({min(train_durations)/16000:.2f} sec)\")\n", "print(f\"Max: {max(train_durations):>10,} samples ({max(train_durations)/16000:.2f} sec)\")\n", "print(f\"Mean: {sum(train_durations)/len(train_durations):>10,.0f} samples ({sum(train_durations)/len(train_durations)/16000:.2f} sec)\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wuVFr9wFsqRg", "executionInfo": { "status": "ok", "timestamp": 1777490703883, "user_tz": 420, "elapsed": 66417, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "2e287716-2338-4baa-dea4-0c788e2a13b8" }, "execution_count": 15, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Measuring durations of all training clips...\n", "Falling back to torchaudio.load (slower but works)\n", "\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "Train: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 25380/25380 [01:06<00:00, 382.25it/s]" ] }, { "output_type": "stream", "name": "stdout", "text": [ "\n", "Measured 25,380 clip durations.\n", "Min: 10,439 samples (0.65 sec)\n", "Max: 211,007 samples (13.19 sec)\n", "Mean: 54,813 samples (3.43 sec)\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "\n" ] } ] }, { "cell_type": "code", "source": [ "# Build the training Dataset\n", "train_dataset = ASVspoofDataset(\n", " utterances=splits['train'],\n", " durations_samples=train_durations,\n", ")\n", "\n", "n_utterances = len(splits['train'])\n", "n_windows = len(train_dataset)\n", "inflation = n_windows / n_utterances\n", "\n", "print(f\"Training set:\")\n", "print(f\" Utterances: {n_utterances:,}\")\n", "print(f\" Total windows: {n_windows:,}\")\n", "print(f\" Inflation factor: {inflation:.2f}x\")\n", "print(f\" (i.e., on average {inflation:.2f} windows per utterance)\")\n", "\n", "# Spot-check a few entries from the index\n", "print(f\"\\nFirst 5 entries from the dataset index:\")\n", "for i in range(5):\n", " utt_idx, window_idx = train_dataset.index[i]\n", " utt = train_dataset.utterances[utt_idx]\n", " duration = train_durations[utt_idx]\n", " n_w_for_utt = sum(1 for j in train_dataset.index if j[0] == utt_idx)\n", " print(f\" idx {i}: utt={utt.utterance_id} ({duration} samples / {duration/16000:.2f} sec), \"\n", " f\"window {window_idx} of {n_w_for_utt}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OAVlnsFktGpR", "executionInfo": { "status": "ok", "timestamp": 1777490753142, "user_tz": 420, "elapsed": 46, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "560c05e0-cbc7-4c90-d043-a91015d4acf3" }, "execution_count": 16, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Training set:\n", " Utterances: 25,380\n", " Total windows: 34,036\n", " Inflation factor: 1.34x\n", " (i.e., on average 1.34 windows per utterance)\n", "\n", "First 5 entries from the dataset index:\n", " idx 0: utt=LA_T_1138215 (55329 samples / 3.46 sec), window 0 of 1\n", " idx 1: utt=LA_T_1271820 (70323 samples / 4.40 sec), window 0 of 2\n", " idx 2: utt=LA_T_1271820 (70323 samples / 4.40 sec), window 1 of 2\n", " idx 3: utt=LA_T_1272637 (46392 samples / 2.90 sec), window 0 of 1\n", " idx 4: utt=LA_T_1276960 (45001 samples / 2.81 sec), window 0 of 1\n" ] } ] }, { "cell_type": "markdown", "source": [ "Inflation factor 1.34x \u2014 meaning training will see 34,036 samples per epoch instead of 25,380. That extra 8,656 windows comes from longer clips contributing multiple windows. Within the range we expected (1.1-1.5).The index walks correctly:\n", "\n", "idx 0: a 3.46-sec clip \u2192 1 window (window 0 of 1)\n", "idx 1: a 4.40-sec clip \u2192 2 windows, this is window 0\n", "idx 2: same clip \u2192 window 1 of 2 (the overlapping window covering 2-6 sec)\n", "idx 3-4: short clips \u2192 1 window each" ], "metadata": { "id": "60GxxAb8tOwJ" } }, { "cell_type": "code", "source": [ "import torch\n", "from torch.utils.data import DataLoader\n", "\n", "# Build a DataLoader with a small batch and a few worker processes\n", "train_loader = DataLoader(\n", " train_dataset,\n", " batch_size=8,\n", " shuffle=True,\n", " num_workers=2, # parallel data loading\n", " pin_memory=True, # faster GPU transfer\n", ")\n", "\n", "print(f\"DataLoader built. Pulling one batch...\")\n", "\n", "# Pull exactly one batch\n", "batch_iter = iter(train_loader)\n", "waveforms, labels, utt_ids = next(batch_iter)\n", "\n", "print(f\"\\nBatch shapes:\")\n", "print(f\" waveforms: {waveforms.shape} (batch_size x samples)\")\n", "print(f\" labels: {labels.shape} (batch_size,)\")\n", "print(f\" utt_ids: list of {len(utt_ids)} strings\")\n", "\n", "print(f\"\\nBatch contents:\")\n", "print(f\" waveform dtype: {waveforms.dtype}\")\n", "print(f\" label values: {labels.tolist()}\")\n", "print(f\" utt_ids: {list(utt_ids)}\")\n", "\n", "print(f\"\\nWaveform statistics:\")\n", "print(f\" Min: {waveforms.min().item():.3f}\")\n", "print(f\" Max: {waveforms.max().item():.3f}\")\n", "print(f\" Mean: {waveforms.mean().item():.4f}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "bV9MxrI3tPib", "executionInfo": { "status": "ok", "timestamp": 1777490799693, "user_tz": 420, "elapsed": 556, "user": { "displayName": "Sara Jaffrani", "userId": "07677779715251349607" } }, "outputId": "8191ed76-f433-4dcd-a8df-53e878dc8020" }, "execution_count": 17, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "DataLoader built. Pulling one batch...\n", "\n", "Batch shapes:\n", " waveforms: torch.Size([8, 64000]) (batch_size x samples)\n", " labels: torch.Size([8]) (batch_size,)\n", " utt_ids: list of 8 strings\n", "\n", "Batch contents:\n", " waveform dtype: torch.float32\n", " label values: [1, 1, 1, 1, 1, 0, 1, 1]\n", " utt_ids: ['LA_T_7084517', 'LA_T_3950907', 'LA_T_5280537', 'LA_T_4465139', 'LA_T_5033641', 'LA_T_3402710', 'LA_T_9651282', 'LA_T_1892752']\n", "\n", "Waveform statistics:\n", " Min: -1.000\n", " Max: 1.000\n", " Mean: -0.0000\n" ] } ] } ] }