#!/usr/bin/env python3 """ Batch-Enhanced FastAPI YouTube Video Downloader Supports both single and batch downloads with cookie support """ import os import sys import subprocess import json import random import time import asyncio import logging import uuid from pathlib import Path from typing import Optional, Dict, Any, List from datetime import datetime from concurrent.futures import ThreadPoolExecutor from fastapi import FastAPI, HTTPException, BackgroundTasks, UploadFile, File from fastapi.responses import FileResponse, HTMLResponse, JSONResponse from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel, HttpUrl import uvicorn # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Pydantic models class VideoInfoRequest(BaseModel): url: HttpUrl use_cookies: Optional[bool] = None class BatchVideoInfoRequest(BaseModel): urls: List[HttpUrl] use_cookies: Optional[bool] = None class DownloadRequest(BaseModel): url: HttpUrl quality: str = "best" audio_only: bool = False use_cookies: Optional[bool] = None class BatchDownloadRequest(BaseModel): urls: List[HttpUrl] quality: str = "best" audio_only: bool = False use_cookies: Optional[bool] = None max_concurrent: int = 2 class VideoInfo(BaseModel): title: str duration: int uploader: str view_count: int upload_date: str description: str formats: int id: str thumbnail: str webpage_url: str class BatchVideoInfo(BaseModel): url: str success: bool info: Optional[VideoInfo] = None error: Optional[str] = None class DownloadResponse(BaseModel): success: bool message: str filename: Optional[str] = None file_size: Optional[int] = None video_info: Optional[VideoInfo] = None download_path: Optional[str] = None class BatchDownloadResponse(BaseModel): batch_id: str total_urls: int status: str completed: int failed: int results: List[Dict[str, Any]] class BatchStatus(BaseModel): batch_id: str status: str total_urls: int completed: int failed: int in_progress: int results: List[Dict[str, Any]] created_at: str updated_at: str class HealthResponse(BaseModel): status: str yt_dlp_available: bool timestamp: str cookie_file_exists: bool strategies_enabled: List[str] batch_support: bool class BatchFileListResponse(BaseModel): batch_id: str total_files: int files: List[Dict[str, str]] # Initialize FastAPI app app = FastAPI( title="Batch YouTube Video Downloader", description="Download YouTube videos individually or in batches with cookie support", version="4.0.2", docs_url="/docs", redoc_url="/redoc" ) # Add CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Thread pool for background tasks executor = ThreadPoolExecutor(max_workers=4) # Global batch status storage (in production, use Redis or database) batch_status_store = {} class CookieManager: """Manages cookie files and validation""" def __init__(self, cookie_dir: str = None): if cookie_dir is None: if os.path.exists('/data'): cookie_dir = '/data/cookies' else: cookie_dir = '/tmp/cookies' self.cookie_dir = Path(cookie_dir) self.cookie_dir.mkdir(parents=True, exist_ok=True) self.cookie_file = self.cookie_dir / "youtube_cookies.txt" def save_cookies(self, cookie_content: str) -> bool: """Save cookie content to file with validation""" try: lines = cookie_content.strip().split('\n') data_lines = [line for line in lines if line.strip() and not line.startswith('#')] if not data_lines: logger.error("Cookie file appears to be empty or contains only comments") return False has_youtube = any('youtube.com' in line for line in data_lines) if not has_youtube: logger.warning("Cookie file doesn't contain youtube.com entries") with open(self.cookie_file, 'w', encoding='utf-8') as f: f.write(cookie_content) logger.info(f"Cookies saved to {self.cookie_file}") return True except Exception as e: logger.error(f"Failed to save cookies: {e}") return False def validate_cookies(self) -> Dict[str, Any]: """Validate existing cookie file""" if not self.cookie_file.exists(): return {"valid": False, "reason": "Cookie file does not exist"} try: with open(self.cookie_file, 'r', encoding='utf-8') as f: content = f.read() lines = content.strip().split('\n') data_lines = [line for line in lines if line.strip() and not line.startswith('#')] if not data_lines: return {"valid": False, "reason": "Cookie file is empty or contains only comments"} youtube_cookies = [line for line in data_lines if 'youtube.com' in line] essential_cookies = ['VISITOR_INFO1_LIVE', 'YSC', 'CONSENT'] found_essential = [] for line in data_lines: for cookie in essential_cookies: if cookie in line: found_essential.append(cookie) return { "valid": True, "total_lines": len(data_lines), "youtube_cookies": len(youtube_cookies), "essential_cookies": found_essential, "file_size": self.cookie_file.stat().st_size, "last_modified": datetime.fromtimestamp(self.cookie_file.stat().st_mtime).isoformat() } except Exception as e: return {"valid": False, "reason": f"Error reading cookie file: {e}"} def get_cookie_path(self) -> Optional[str]: """Get path to cookie file if it exists and is valid""" validation = self.validate_cookies() if validation["valid"]: return str(self.cookie_file) return None class BatchYouTubeDownloader: """Enhanced YouTube downloader with batch support and cookie handling""" def __init__(self, download_dir: str = None): if download_dir is None: if os.path.exists('/data'): download_dir = '/data/downloads' else: download_dir = '/tmp/downloads' self.download_dir = Path(download_dir) self.download_dir.mkdir(parents=True, exist_ok=True) self.cookie_manager = CookieManager() self.user_agents = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', ] self._ensure_ytdlp_available() def _ensure_ytdlp_available(self): """Ensure yt-dlp is available, install if necessary""" try: subprocess.run(['yt-dlp', '--version'], capture_output=True, check=True) logger.info("yt-dlp is available") except (subprocess.CalledProcessError, FileNotFoundError): logger.info("Installing yt-dlp...") try: subprocess.run([sys.executable, '-m', 'pip', 'install', 'yt-dlp'], check=True, capture_output=True) logger.info("yt-dlp installed successfully") except subprocess.CalledProcessError as e: logger.error(f"Failed to install yt-dlp: {e}") raise RuntimeError("Could not install yt-dlp") def _build_command(self, base_cmd: List[str], use_cookies: bool = False) -> List[str]: """Build yt-dlp command with proper options""" cmd = base_cmd.copy() cmd.extend(['--user-agent', random.choice(self.user_agents)]) if use_cookies: cookie_path = self.cookie_manager.get_cookie_path() if cookie_path: cmd.extend(['--cookies', cookie_path]) logger.info("Using cookie file for authentication") else: logger.warning("Cookies requested but no valid cookie file found") cmd.extend(['--no-cookies']) else: cmd.extend(['--no-cookies']) cmd.extend([ '--sleep-interval', str(random.randint(1, 3)), '--retries', '5', '--fragment-retries', '5', '--socket-timeout', '30', '--no-check-certificates', '--geo-bypass', '--add-header', 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', '--add-header', 'Accept-Language:en-US,en;q=0.5', '--add-header', 'Accept-Encoding:gzip, deflate', '--add-header', 'DNT:1', '--add-header', 'Connection:keep-alive', '--add-header', 'Upgrade-Insecure-Requests:1', ]) return cmd def get_video_info(self, url: str, use_cookies: Optional[bool] = None, retry_count: int = 0) -> Optional[Dict[str, Any]]: """Get video information with cookie support""" max_retries = 3 # Determine actual use_cookies value actual_use_cookies = use_cookies if use_cookies is not None else self.cookie_manager.get_cookie_path() is not None try: base_cmd = ["yt-dlp", "--dump-json", "--no-download", str(url)] cmd = self._build_command(base_cmd, actual_use_cookies) logger.info(f"Getting video info (attempt {retry_count + 1}, cookies: {actual_use_cookies})") result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=60) video_info = json.loads(result.stdout) return { 'title': video_info.get('title', 'Unknown'), 'duration': video_info.get('duration', 0), 'uploader': video_info.get('uploader', 'Unknown'), 'view_count': video_info.get('view_count', 0), 'upload_date': video_info.get('upload_date', 'Unknown'), 'description': video_info.get('description', ''), 'formats': len(video_info.get('formats', [])), 'id': video_info.get('id', 'Unknown'), 'thumbnail': video_info.get('thumbnail', ''), 'webpage_url': video_info.get('webpage_url', str(url)) } except subprocess.CalledProcessError as e: error_msg = e.stderr.lower() if e.stderr else "" if "429" in error_msg or "too many requests" in error_msg: if retry_count < max_retries: wait_time = (retry_count + 1) * 30 logger.warning(f"Rate limited, waiting {wait_time}s before retry {retry_count + 1}") time.sleep(wait_time) return self.get_video_info(url, use_cookies, retry_count + 1) elif "sign in" in error_msg or "bot" in error_msg: if not use_cookies and retry_count == 0: logger.warning("Bot detection triggered, retrying with cookies if available") return self.get_video_info(url, True, retry_count + 1) elif retry_count < max_retries: wait_time = (retry_count + 1) * 60 logger.warning(f"Bot detection, waiting {wait_time}s before retry {retry_count + 1}") time.sleep(wait_time) return self.get_video_info(url, use_cookies, retry_count + 1) logger.error(f"Failed to get video info: {e.stderr}") return None except (json.JSONDecodeError, subprocess.TimeoutExpired) as e: logger.error(f"Error processing video info: {e}") if retry_count < max_retries: time.sleep(10) return self.get_video_info(url, use_cookies, retry_count + 1) return None def download_video(self, url: str, quality: str = "best", audio_only: bool = False, use_cookies: Optional[bool] = None, retry_count: int = 0) -> Optional[str]: """Download video with cookie support""" max_retries = 2 # Determine actual use_cookies value actual_use_cookies = use_cookies if use_cookies is not None else self.cookie_manager.get_cookie_path() is not None try: base_cmd = ["yt-dlp"] output_template = str(self.download_dir / "%(title)s_%(id)s.%(ext)s") base_cmd.extend(["-o", output_template]) if audio_only: base_cmd.extend(["-f", "bestaudio/best"]) else: if quality == "best": base_cmd.extend(["-f", "bestvideo[height<=720]+bestaudio/best[height<=720]"]) elif quality == "720p": base_cmd.extend(["-f", "bestvideo[height=720]+bestaudio/best[height=720]"]) elif quality == "worst": base_cmd.extend(["-f", "worst"]) else: base_cmd.extend(["-f", quality]) base_cmd.append(str(url)) cmd = self._build_command(base_cmd, actual_use_cookies) logger.info(f"Downloading video (attempt {retry_count + 1}, cookies: {actual_use_cookies})") result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=300) logger.info("Download completed successfully") downloaded_files = [f for f in self.download_dir.glob("*") if f.is_file()] if downloaded_files: latest_file = max(downloaded_files, key=os.path.getctime) return str(latest_file) return None except subprocess.CalledProcessError as e: error_msg = e.stderr.lower() if e.stderr else "" if ("429" in error_msg or "too many requests" in error_msg or "sign in" in error_msg or "bot" in error_msg): if not use_cookies and retry_count == 0: logger.warning("Download blocked, retrying with cookies if available") return self.download_video(url, quality, audio_only, True, retry_count + 1) elif retry_count < max_retries: wait_time = (retry_count + 1) * 60 logger.warning(f"Download blocked, waiting {wait_time}s before retry {retry_count + 1}") time.sleep(wait_time) return self.download_video(url, quality, audio_only, use_cookies, retry_count + 1) logger.error(f"Download failed: {e.stderr}") return None except subprocess.TimeoutExpired: logger.error("Download timeout") if retry_count < max_retries: return self.download_video(url, quality, audio_only, use_cookies, retry_count + 1) return None async def batch_get_info(self, urls: List[str], use_cookies: bool = False) -> List[BatchVideoInfo]: """Get info for multiple videos concurrently""" results = [] async def get_single_info(url: str) -> BatchVideoInfo: try: loop = asyncio.get_event_loop() info = await loop.run_in_executor(executor, self.get_video_info, url, use_cookies) if info: return BatchVideoInfo( url=url, success=True, info=VideoInfo(**info) ) else: return BatchVideoInfo( url=url, success=False, error="Failed to get video information" ) except Exception as e: return BatchVideoInfo( url=url, success=False, error=str(e) ) # Process URLs concurrently with a limit semaphore = asyncio.Semaphore(3) # Limit concurrent requests async def limited_get_info(url: str) -> BatchVideoInfo: async with semaphore: return await get_single_info(url) tasks = [limited_get_info(url) for url in urls] results = await asyncio.gather(*tasks) return results async def batch_download(self, batch_id: str, urls: List[str], quality: str = "best", audio_only: bool = False, use_cookies: bool = False, max_concurrent: int = 2) -> None: """Download multiple videos with progress tracking""" # Initialize batch status batch_status_store[batch_id] = { "batch_id": batch_id, "status": "in_progress", "total_urls": len(urls), "completed": 0, "failed": 0, "in_progress": 0, "results": [], "created_at": datetime.now().isoformat(), "updated_at": datetime.now().isoformat() } semaphore = asyncio.Semaphore(max_concurrent) async def download_single(url: str) -> Dict[str, Any]: async with semaphore: batch_status_store[batch_id]["in_progress"] += 1 batch_status_store[batch_id]["updated_at"] = datetime.now().isoformat() try: loop = asyncio.get_event_loop() # Get video info first info = await loop.run_in_executor( executor, self.get_video_info, url, use_cookies ) if not info: result = { "url": url, "success": False, "error": "Failed to get video information", "completed_at": datetime.now().isoformat() } batch_status_store[batch_id]["failed"] += 1 batch_status_store[batch_id]["in_progress"] -= 1 batch_status_store[batch_id]["results"].append(result) batch_status_store[batch_id]["updated_at"] = datetime.now().isoformat() return result # Download the video downloaded_file = await loop.run_in_executor( executor, self.download_video, url, quality, audio_only, use_cookies ) if downloaded_file: file_size = os.path.getsize(downloaded_file) filename = os.path.basename(downloaded_file) result = { "url": url, "success": True, "filename": filename, "file_size": file_size, "video_info": info, "download_path": downloaded_file, "completed_at": datetime.now().isoformat() } batch_status_store[batch_id]["completed"] += 1 else: result = { "url": url, "success": False, "error": "Failed to download video", "completed_at": datetime.now().isoformat() } batch_status_store[batch_id]["failed"] += 1 batch_status_store[batch_id]["in_progress"] -= 1 batch_status_store[batch_id]["results"].append(result) batch_status_store[batch_id]["updated_at"] = datetime.now().isoformat() return result except Exception as e: result = { "url": url, "success": False, "error": str(e), "completed_at": datetime.now().isoformat() } batch_status_store[batch_id]["failed"] += 1 batch_status_store[batch_id]["in_progress"] -= 1 batch_status_store[batch_id]["results"].append(result) batch_status_store[batch_id]["updated_at"] = datetime.now().isoformat() return result # Process all downloads tasks = [download_single(url) for url in urls] await asyncio.gather(*tasks) # Mark batch as completed batch_status_store[batch_id]["status"] = "completed" batch_status_store[batch_id]["updated_at"] = datetime.now().isoformat() # Global downloader instance downloader = BatchYouTubeDownloader() @app.get("/", response_class=HTMLResponse) async def read_root(): """Serve the main HTML interface with batch support and cookie upload""" html_content = """
Download single videos or process multiple URLs in batches with cookie support
POST /upload-cookies - Upload cookie fileGET /cookie-status - Check cookie statusPOST /video/info - Get single video informationPOST /video/download - Download single videoGET /video/file/{filename} - Download a specific filePOST /batch/info - Get info for multiple videosPOST /batch/download - Start batch downloadGET /batch/status/{batch_id} - Check batch progressGET /batch/files/{batch_id} - Get list of downloadable files