Update app.py
Browse files
app.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
-
YouTube Comment Sentiment Analyzer -
|
| 4 |
-
|
| 5 |
"""
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
import pandas as pd
|
| 9 |
import numpy as np
|
| 10 |
import matplotlib
|
| 11 |
-
matplotlib.use('Agg')
|
| 12 |
import matplotlib.pyplot as plt
|
| 13 |
import seaborn as sns
|
| 14 |
import re
|
|
@@ -22,61 +22,94 @@ from googleapiclient.errors import HttpError
|
|
| 22 |
import warnings
|
| 23 |
warnings.filterwarnings('ignore')
|
| 24 |
|
| 25 |
-
#
|
|
|
|
|
|
|
|
|
|
| 26 |
try:
|
| 27 |
import emoji
|
| 28 |
EMOJI_AVAILABLE = True
|
| 29 |
except ImportError:
|
| 30 |
EMOJI_AVAILABLE = False
|
| 31 |
|
| 32 |
-
# Set seed for consistent language detection
|
| 33 |
DetectorFactory.seed = 0
|
| 34 |
-
|
| 35 |
-
# Set matplotlib style
|
| 36 |
plt.style.use('seaborn-v0_8-darkgrid')
|
| 37 |
sns.set_palette("husl")
|
| 38 |
|
| 39 |
-
# Get YouTube API key from Hugging Face Secrets
|
| 40 |
YOUTUBE_API_KEY = os.environ.get("GoogleAPIKey")
|
| 41 |
|
| 42 |
-
#
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
class YouTubeSentimentAnalyzer:
|
| 52 |
-
"""Main analyzer class for YouTube comments"""
|
| 53 |
-
|
| 54 |
def __init__(self):
|
| 55 |
-
"""Initialize YouTube API client"""
|
| 56 |
self.youtube = None
|
| 57 |
if YOUTUBE_API_KEY:
|
| 58 |
try:
|
| 59 |
self.youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
|
| 60 |
-
print("β
YouTube API initialized
|
| 61 |
except Exception as e:
|
| 62 |
-
print(f"β
|
| 63 |
|
| 64 |
def extract_comments(self, video_url, max_comments=150):
|
| 65 |
-
"""
|
| 66 |
-
Extract comments from YouTube video using official API
|
| 67 |
-
|
| 68 |
-
Args:
|
| 69 |
-
video_url: YouTube video URL
|
| 70 |
-
max_comments: Maximum number of comments to extract
|
| 71 |
-
|
| 72 |
-
Returns:
|
| 73 |
-
tuple: (comments_list, error_message)
|
| 74 |
-
"""
|
| 75 |
if not self.youtube:
|
| 76 |
-
return [], "YouTube API not configured.
|
| 77 |
|
| 78 |
try:
|
| 79 |
-
# Extract video ID from URL
|
| 80 |
if 'v=' in video_url:
|
| 81 |
video_id = video_url.split('v=')[-1].split('&')[0]
|
| 82 |
elif 'youtu.be/' in video_url:
|
|
@@ -84,8 +117,6 @@ class YouTubeSentimentAnalyzer:
|
|
| 84 |
else:
|
| 85 |
video_id = video_url
|
| 86 |
|
| 87 |
-
print(f"Fetching comments for video ID: {video_id}")
|
| 88 |
-
|
| 89 |
comments = []
|
| 90 |
next_page_token = None
|
| 91 |
|
|
@@ -105,498 +136,260 @@ class YouTubeSentimentAnalyzer:
|
|
| 105 |
'author': comment_data.get('authorDisplayName', 'Anonymous'),
|
| 106 |
'text': comment_data.get('textDisplay', ''),
|
| 107 |
'likes': comment_data.get('likeCount', 0),
|
| 108 |
-
'time': comment_data.get('publishedAt', '')
|
| 109 |
-
'replies': item['snippet'].get('totalReplyCount', 0)
|
| 110 |
})
|
| 111 |
|
| 112 |
next_page_token = response.get('nextPageToken')
|
| 113 |
if not next_page_token:
|
| 114 |
break
|
| 115 |
|
| 116 |
-
print(f"β
Successfully extracted {len(comments)} comments")
|
| 117 |
return comments, None
|
| 118 |
-
|
| 119 |
except HttpError as e:
|
| 120 |
if e.resp.status == 403:
|
| 121 |
-
return [], "
|
| 122 |
-
|
| 123 |
-
return [], "Video not found or comments are disabled."
|
| 124 |
-
else:
|
| 125 |
-
return [], f"YouTube API Error: {str(e)}"
|
| 126 |
except Exception as e:
|
| 127 |
-
return [],
|
| 128 |
|
| 129 |
def clean_text(self, text):
|
| 130 |
-
"""
|
| 131 |
-
Clean and preprocess text for analysis
|
| 132 |
-
|
| 133 |
-
Args:
|
| 134 |
-
text: Raw comment text
|
| 135 |
-
|
| 136 |
-
Returns:
|
| 137 |
-
Cleaned text
|
| 138 |
-
"""
|
| 139 |
if not text or not isinstance(text, str):
|
| 140 |
return ""
|
| 141 |
-
|
| 142 |
-
# Remove URLs
|
| 143 |
text = re.sub(r'http\S+|www\S+|https\S+', '', text)
|
| 144 |
-
# Remove HTML tags
|
| 145 |
text = re.sub(r'<.*?>', '', text)
|
| 146 |
-
# Remove special characters but keep basic punctuation
|
| 147 |
-
text = re.sub(r'[^\w\s\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF\.\,\!\?\']', ' ', text)
|
| 148 |
-
# Remove extra whitespace
|
| 149 |
text = re.sub(r'\s+', ' ', text).strip()
|
| 150 |
-
|
| 151 |
return text
|
| 152 |
|
| 153 |
def detect_language(self, text):
|
| 154 |
-
"""
|
| 155 |
-
Detect if text is English or Urdu/Roman Urdu
|
| 156 |
-
|
| 157 |
-
Args:
|
| 158 |
-
text: Cleaned comment text
|
| 159 |
-
|
| 160 |
-
Returns:
|
| 161 |
-
Language code: 'english', 'urdu', 'other', or 'unknown'
|
| 162 |
-
"""
|
| 163 |
try:
|
| 164 |
if not text or len(text) < 3:
|
| 165 |
return 'unknown'
|
| 166 |
-
|
| 167 |
-
# Check for Urdu characters (Unicode range for Arabic/Persian/Urdu)
|
| 168 |
-
if re.search(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF]', text):
|
| 169 |
return 'urdu'
|
| 170 |
-
|
| 171 |
-
|
| 172 |
lang = detect(text)
|
| 173 |
return 'english' if lang == 'en' else 'other'
|
| 174 |
-
|
| 175 |
-
except Exception:
|
| 176 |
return 'unknown'
|
| 177 |
|
| 178 |
-
def
|
| 179 |
-
"""
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
emojis_found.append(char)
|
| 195 |
-
return emojis_found
|
| 196 |
|
| 197 |
-
def
|
| 198 |
-
"""
|
| 199 |
-
Analyze sentiment using TextBlob
|
| 200 |
-
|
| 201 |
-
Args:
|
| 202 |
-
text: Cleaned comment text
|
| 203 |
-
|
| 204 |
-
Returns:
|
| 205 |
-
tuple: (sentiment_label, polarity_score)
|
| 206 |
-
"""
|
| 207 |
try:
|
| 208 |
blob = TextBlob(text)
|
| 209 |
polarity = blob.sentiment.polarity
|
| 210 |
-
|
| 211 |
if polarity > 0.1:
|
| 212 |
return 'Positive', polarity
|
| 213 |
elif polarity < -0.1:
|
| 214 |
return 'Negative', polarity
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
except Exception:
|
| 218 |
return 'Neutral', 0.0
|
| 219 |
|
| 220 |
def process_comments(self, comments):
|
| 221 |
-
"""
|
| 222 |
-
Process and analyze all comments
|
| 223 |
-
|
| 224 |
-
Args:
|
| 225 |
-
comments: List of comment dictionaries
|
| 226 |
-
|
| 227 |
-
Returns:
|
| 228 |
-
DataFrame with analysis results
|
| 229 |
-
"""
|
| 230 |
if not comments:
|
| 231 |
return pd.DataFrame()
|
| 232 |
|
| 233 |
-
# Create DataFrame
|
| 234 |
df = pd.DataFrame(comments)
|
| 235 |
-
|
| 236 |
-
# Clean text
|
| 237 |
df['clean_text'] = df['text'].apply(self.clean_text)
|
| 238 |
df = df[df['clean_text'].str.len() > 2]
|
| 239 |
|
| 240 |
if len(df) == 0:
|
| 241 |
return df
|
| 242 |
|
| 243 |
-
# Detect language
|
| 244 |
df['language'] = df['clean_text'].apply(self.detect_language)
|
| 245 |
|
| 246 |
-
# Analyze sentiment
|
| 247 |
sentiments = []
|
| 248 |
-
|
| 249 |
-
for
|
| 250 |
-
|
|
|
|
|
|
|
|
|
|
| 251 |
sentiments.append(sent)
|
| 252 |
-
|
| 253 |
|
| 254 |
df['sentiment'] = sentiments
|
| 255 |
-
df['polarity'] =
|
| 256 |
|
| 257 |
-
# Extract emojis
|
| 258 |
if EMOJI_AVAILABLE:
|
| 259 |
-
df['emojis'] = df['text'].apply(
|
| 260 |
df['emoji_count'] = df['emojis'].apply(len)
|
| 261 |
df['has_emoji'] = df['emoji_count'] > 0
|
| 262 |
|
| 263 |
return df
|
| 264 |
|
| 265 |
-
|
| 266 |
def create_visualizations(df):
|
| 267 |
-
"""
|
| 268 |
-
Create all visualization plots
|
| 269 |
-
|
| 270 |
-
Args:
|
| 271 |
-
df: DataFrame with analysis results
|
| 272 |
-
|
| 273 |
-
Returns:
|
| 274 |
-
tuple: (pie_chart, language_chart, bar_chart, top_table, wordcloud_plot)
|
| 275 |
-
"""
|
| 276 |
if len(df) == 0:
|
| 277 |
return None, None, None, None, None
|
| 278 |
|
| 279 |
-
#
|
| 280 |
fig1, ax1 = plt.subplots(figsize=(10, 8))
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
ax1.
|
| 286 |
-
colors=plot_colors, startangle=90, explode=[0.05] * len(sentiment_counts))
|
| 287 |
-
ax1.set_title('Sentiment Distribution', fontsize=16, fontweight='bold', pad=20)
|
| 288 |
plt.tight_layout()
|
| 289 |
-
|
| 290 |
|
| 291 |
-
#
|
| 292 |
fig2, ax2 = plt.subplots(figsize=(10, 8))
|
| 293 |
lang_counts = df['language'].value_counts()
|
| 294 |
-
lang_labels = {'english': 'English', 'urdu': 'Urdu/Roman Urdu', 'other': 'Other'
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
ax2.pie(lang_counts.values, labels=lang_labels_display, autopct='%1.1f%%',
|
| 300 |
-
colors=plot_colors_lang, startangle=90)
|
| 301 |
-
ax2.set_title('Language Distribution', fontsize=16, fontweight='bold', pad=20)
|
| 302 |
plt.tight_layout()
|
| 303 |
-
|
| 304 |
|
| 305 |
-
#
|
| 306 |
fig3, ax3 = plt.subplots(figsize=(10, 6))
|
| 307 |
-
bars = ax3.bar(
|
| 308 |
-
color=[colors_sent.get(x, '#95a5a6') for x in sentiment_counts.index],
|
| 309 |
-
edgecolor='black', linewidth=1.5)
|
| 310 |
-
|
| 311 |
for bar in bars:
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
ax3.set_xlabel('Sentiment', fontsize=14)
|
| 317 |
-
ax3.set_ylabel('Number of Comments', fontsize=14)
|
| 318 |
-
ax3.set_title('Sentiment Distribution (Bar Chart)', fontsize=14, fontweight='bold')
|
| 319 |
ax3.grid(axis='y', alpha=0.3)
|
| 320 |
plt.tight_layout()
|
| 321 |
-
|
| 322 |
|
| 323 |
-
#
|
| 324 |
fig4, ax4 = plt.subplots(figsize=(14, 8))
|
| 325 |
ax4.axis('tight')
|
| 326 |
ax4.axis('off')
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
)
|
| 332 |
-
|
| 333 |
-
table = ax4.table(cellText=top_comments.values,
|
| 334 |
-
colLabels=['Author', 'Comment', 'Likes', 'Sentiment'],
|
| 335 |
-
cellLoc='left', loc='center',
|
| 336 |
-
colWidths=[0.15, 0.55, 0.1, 0.1])
|
| 337 |
-
|
| 338 |
table.auto_set_font_size(False)
|
| 339 |
table.set_fontsize(9)
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
# Color code sentiment column
|
| 343 |
-
for i, sentiment in enumerate(top_comments['sentiment'].values, start=1):
|
| 344 |
-
if sentiment == 'Positive':
|
| 345 |
table[(i, 3)].set_facecolor('#90EE90')
|
| 346 |
-
elif
|
| 347 |
table[(i, 3)].set_facecolor('#FFB6C1')
|
| 348 |
-
|
| 349 |
-
table[(i, 3)].set_facecolor('#F0E68C')
|
| 350 |
-
|
| 351 |
-
ax4.set_title('Top 10 Most Engaging Comments', fontsize=16, fontweight='bold', pad=20)
|
| 352 |
plt.tight_layout()
|
| 353 |
top_table = fig4
|
| 354 |
|
| 355 |
-
#
|
| 356 |
fig5, ax5 = plt.subplots(figsize=(12, 6))
|
| 357 |
all_text = ' '.join(df['clean_text'].tolist())
|
| 358 |
-
|
| 359 |
if all_text.strip():
|
| 360 |
try:
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
contour_width=1, contour_color='steelblue').generate(all_text)
|
| 364 |
-
ax5.imshow(wordcloud, interpolation='bilinear')
|
| 365 |
ax5.axis('off')
|
| 366 |
-
ax5.set_title('Word Cloud
|
| 367 |
-
except
|
| 368 |
-
ax5.text(0.5, 0.5,
|
| 369 |
-
ha='center', va='center', transform=ax5.transAxes)
|
| 370 |
-
else:
|
| 371 |
-
ax5.text(0.5, 0.5, 'No text available for word cloud',
|
| 372 |
-
ha='center', va='center', transform=ax5.transAxes)
|
| 373 |
-
|
| 374 |
plt.tight_layout()
|
| 375 |
-
|
| 376 |
|
| 377 |
-
return
|
| 378 |
-
|
| 379 |
|
| 380 |
def analyze_youtube_video(video_url, progress=gr.Progress()):
|
| 381 |
-
"""
|
| 382 |
-
Main analysis function for Gradio interface
|
| 383 |
-
|
| 384 |
-
Args:
|
| 385 |
-
video_url: YouTube video URL
|
| 386 |
-
|
| 387 |
-
Returns:
|
| 388 |
-
tuple: (statistics_text, pie_chart, language_chart, bar_chart, top_table, wordcloud_plot)
|
| 389 |
-
"""
|
| 390 |
-
# Validate input
|
| 391 |
if not video_url or not video_url.strip():
|
| 392 |
-
return "β
|
| 393 |
|
| 394 |
-
# Check API key
|
| 395 |
if not YOUTUBE_API_KEY:
|
| 396 |
-
return "
|
| 397 |
-
|
| 398 |
-
Please add your YouTube API key as a repository secret:
|
| 399 |
-
1. Go to the **Settings** tab of this Space
|
| 400 |
-
2. Scroll to **Repository Secrets**
|
| 401 |
-
3. Click **New secret**
|
| 402 |
-
4. Name: `GoogleAPIKey`
|
| 403 |
-
5. Value: Your YouTube API key from Google Cloud Console
|
| 404 |
-
6. Click **Add secret**
|
| 405 |
-
|
| 406 |
-
Then refresh this page and try again.""", [None] * 5
|
| 407 |
|
| 408 |
try:
|
| 409 |
-
|
| 410 |
-
progress(0.1, desc="Initializing YouTube API...")
|
| 411 |
analyzer = YouTubeSentimentAnalyzer()
|
| 412 |
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
# Step 2: Extract comments
|
| 417 |
-
progress(0.2, desc="Extracting comments from YouTube...")
|
| 418 |
-
comments, error = analyzer.extract_comments(video_url, max_comments=50)
|
| 419 |
|
| 420 |
if error:
|
| 421 |
-
return f"β {error}",
|
| 422 |
|
| 423 |
if not comments:
|
| 424 |
-
return "β No comments found
|
| 425 |
|
| 426 |
-
|
| 427 |
-
progress(0.5, desc=f"Processing {len(comments)} comments...")
|
| 428 |
df = analyzer.process_comments(comments)
|
| 429 |
|
| 430 |
if len(df) == 0:
|
| 431 |
-
return "β No valid comments
|
| 432 |
|
| 433 |
-
# Step 4: Generate statistics
|
| 434 |
progress(0.7, desc="Generating statistics...")
|
| 435 |
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
positive_count = len(df[df['sentiment'] == 'Positive'])
|
| 442 |
-
negative_count = len(df[df['sentiment'] == 'Negative'])
|
| 443 |
-
neutral_count = len(df[df['sentiment'] == 'Neutral'])
|
| 444 |
-
|
| 445 |
-
english_count = len(df[df['language'] == 'english'])
|
| 446 |
urdu_count = len(df[df['language'] == 'urdu'])
|
|
|
|
| 447 |
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
if EMOJI_AVAILABLE and 'has_emoji' in df.columns:
|
| 451 |
-
emoji_comments = df['has_emoji'].sum()
|
| 452 |
-
total_emojis = df['emoji_count'].sum()
|
| 453 |
-
unique_emojis = df['emojis'].sum()
|
| 454 |
-
|
| 455 |
-
if emoji_comments > 0:
|
| 456 |
-
emoji_section = f"""
|
| 457 |
-
### π Emoji Analysis
|
| 458 |
-
- **Comments with emojis:** {emoji_comments} ({emoji_comments/total_comments*100:.1f}%)
|
| 459 |
-
- **Total emojis used:** {total_emojis}
|
| 460 |
-
- **Average emojis per comment:** {df['emoji_count'].mean():.2f}
|
| 461 |
-
"""
|
| 462 |
-
|
| 463 |
-
# Top commenters
|
| 464 |
-
top_authors = df['author'].value_counts().head(5)
|
| 465 |
-
top_authors_text = ""
|
| 466 |
-
for author, count in top_authors.items():
|
| 467 |
-
if author != 'Anonymous':
|
| 468 |
-
top_authors_text += f"- **{author}:** {count} comments\n"
|
| 469 |
-
|
| 470 |
-
if not top_authors_text:
|
| 471 |
-
top_authors_text = "- No active commenters found\n"
|
| 472 |
-
|
| 473 |
-
# Build statistics text
|
| 474 |
-
stats_text = f"""
|
| 475 |
-
## π Analysis Results
|
| 476 |
-
|
| 477 |
-
### Basic Statistics
|
| 478 |
-
- **Total Comments Analyzed:** {total_comments:,}
|
| 479 |
-
- **Total Likes Received:** {total_likes:,}
|
| 480 |
-
- **Average Likes per Comment:** {avg_likes:.2f}
|
| 481 |
-
- **Median Likes per Comment:** {median_likes:.0f}
|
| 482 |
|
| 483 |
-
###
|
| 484 |
-
- **
|
| 485 |
-
- **
|
| 486 |
-
- **
|
|
|
|
|
|
|
|
|
|
| 487 |
|
| 488 |
-
|
| 489 |
-
- **English Comments:** {english_count} ({english_count/total_comments*100:.1f}%)
|
| 490 |
-
- **Urdu/Roman Urdu Comments:** {urdu_count} ({urdu_count/total_comments*100:.1f}%)
|
| 491 |
-
|
| 492 |
-
{emoji_section}
|
| 493 |
-
### π₯ Most Active Commenters
|
| 494 |
-
{top_authors_text}
|
| 495 |
-
---
|
| 496 |
-
*Analysis completed using YouTube Data API v3*
|
| 497 |
"""
|
| 498 |
|
| 499 |
-
# Step 5: Create visualizations
|
| 500 |
progress(0.9, desc="Creating visualizations...")
|
| 501 |
-
|
| 502 |
|
| 503 |
progress(1.0, desc="Complete!")
|
| 504 |
-
|
| 505 |
-
return stats_text, pie_chart, lang_chart, bar_chart, top_table, wordcloud_plot
|
| 506 |
|
| 507 |
except Exception as e:
|
| 508 |
-
|
| 509 |
-
error_details = traceback.format_exc()
|
| 510 |
-
print(error_details)
|
| 511 |
-
return f"β Unexpected error: {str(e)}\n\nPlease check the video URL and try again.", [None] * 5
|
| 512 |
-
|
| 513 |
|
| 514 |
-
# Create
|
| 515 |
-
with gr.Blocks(title="YouTube
|
| 516 |
-
.gradio-container { max-width: 1200px; margin: auto; }
|
| 517 |
-
footer { visibility: hidden }
|
| 518 |
-
""") as demo:
|
| 519 |
-
|
| 520 |
gr.Markdown("""
|
| 521 |
# π¬ YouTube Comment Sentiment Analyzer
|
| 522 |
|
| 523 |
-
**
|
| 524 |
-
|
| 525 |
-
### β¨ Features:
|
| 526 |
-
- π Extract real comments using official YouTube API
|
| 527 |
-
- π Automatic language detection (English/Urdu)
|
| 528 |
-
- π Sentiment analysis (Positive/Negative/Neutral)
|
| 529 |
-
- π Emoji extraction and counting
|
| 530 |
-
- π Interactive visualizations
|
| 531 |
-
- π Identify top engaging comments
|
| 532 |
|
| 533 |
-
|
| 534 |
-
1. Paste a YouTube video URL below
|
| 535 |
-
2. Click **Analyze Video**
|
| 536 |
-
3. Wait 30-60 seconds for analysis
|
| 537 |
""")
|
| 538 |
|
| 539 |
with gr.Row():
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
label="YouTube Video URL",
|
| 543 |
-
placeholder="https://www.youtube.com/watch?v=VIDEO_ID or https://youtu.be/VIDEO_ID",
|
| 544 |
-
lines=1,
|
| 545 |
-
show_label=True
|
| 546 |
-
)
|
| 547 |
-
with gr.Column(scale=1):
|
| 548 |
-
analyze_btn = gr.Button("π Analyze Video", variant="primary", size="lg")
|
| 549 |
|
| 550 |
-
gr.Markdown("
|
| 551 |
-
|
| 552 |
-
# Statistics output
|
| 553 |
-
stats_output = gr.Markdown("### π Enter a YouTube URL above and click 'Analyze Video' to start...")
|
| 554 |
-
|
| 555 |
-
gr.Markdown("### π Visualizations")
|
| 556 |
|
| 557 |
with gr.Row():
|
| 558 |
-
|
| 559 |
-
|
| 560 |
|
| 561 |
with gr.Row():
|
| 562 |
-
|
| 563 |
-
wordcloud_plot = gr.Plot(label="Word Cloud
|
| 564 |
|
| 565 |
with gr.Row():
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
# Set up click handler
|
| 569 |
-
analyze_btn.click(
|
| 570 |
-
fn=analyze_youtube_video,
|
| 571 |
-
inputs=[video_url],
|
| 572 |
-
outputs=[stats_output, sentiment_pie, language_pie, sentiment_bar, top_comments_table, wordcloud_plot]
|
| 573 |
-
)
|
| 574 |
-
|
| 575 |
-
gr.Markdown("""
|
| 576 |
-
---
|
| 577 |
-
### π Important Information
|
| 578 |
-
|
| 579 |
-
**YouTube API Free Tier:**
|
| 580 |
-
- 10,000 units per day (free)
|
| 581 |
-
- Each analysis uses ~150 units
|
| 582 |
-
- You can analyze ~66 videos per day for free
|
| 583 |
-
|
| 584 |
-
**Setup Instructions:**
|
| 585 |
-
1. Get your free API key from [Google Cloud Console](https://console.cloud.google.com/)
|
| 586 |
-
2. Enable **YouTube Data API v3**
|
| 587 |
-
3. Add the key as `GoogleAPIKey` in **Settings β Repository Secrets**
|
| 588 |
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
- Comments must be in English or Roman Urdu script
|
| 592 |
-
- Videos with disabled comments will not work
|
| 593 |
-
|
| 594 |
-
**Technical Details:**
|
| 595 |
-
- Sentiment Analysis: TextBlob
|
| 596 |
-
- Language Detection: langdetect + Unicode range detection
|
| 597 |
-
- Emoji Support: emoji library
|
| 598 |
-
- Visualization: Matplotlib & Seaborn
|
| 599 |
-
""")
|
| 600 |
|
| 601 |
-
# Launch the app
|
| 602 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
+
YouTube Comment Sentiment Analyzer - WITH PROPER URDU SUPPORT
|
| 4 |
+
Uses XLM-RoBERTa model for Roman Urdu sentiment
|
| 5 |
"""
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
import pandas as pd
|
| 9 |
import numpy as np
|
| 10 |
import matplotlib
|
| 11 |
+
matplotlib.use('Agg')
|
| 12 |
import matplotlib.pyplot as plt
|
| 13 |
import seaborn as sns
|
| 14 |
import re
|
|
|
|
| 22 |
import warnings
|
| 23 |
warnings.filterwarnings('ignore')
|
| 24 |
|
| 25 |
+
# For Urdu sentiment analysis
|
| 26 |
+
from transformers import pipeline
|
| 27 |
+
|
| 28 |
+
# For emoji
|
| 29 |
try:
|
| 30 |
import emoji
|
| 31 |
EMOJI_AVAILABLE = True
|
| 32 |
except ImportError:
|
| 33 |
EMOJI_AVAILABLE = False
|
| 34 |
|
|
|
|
| 35 |
DetectorFactory.seed = 0
|
|
|
|
|
|
|
| 36 |
plt.style.use('seaborn-v0_8-darkgrid')
|
| 37 |
sns.set_palette("husl")
|
| 38 |
|
|
|
|
| 39 |
YOUTUBE_API_KEY = os.environ.get("GoogleAPIKey")
|
| 40 |
|
| 41 |
+
# Load Roman Urdu sentiment model
|
| 42 |
+
print("Loading Urdu sentiment model...")
|
| 43 |
+
try:
|
| 44 |
+
urdu_sentiment_pipeline = pipeline(
|
| 45 |
+
"text-classification",
|
| 46 |
+
model="Khubaib01/roman-urdu-sentiment-xlm-r",
|
| 47 |
+
truncation=True,
|
| 48 |
+
device=-1
|
| 49 |
+
)
|
| 50 |
+
URDU_MODEL_AVAILABLE = True
|
| 51 |
+
print("β
Urdu sentiment model loaded successfully")
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"β οΈ Could not load Urdu model: {e}")
|
| 54 |
+
print("Will use enhanced Urdu keyword matching as fallback")
|
| 55 |
+
URDU_MODEL_AVAILABLE = False
|
| 56 |
+
|
| 57 |
+
# Urdu positive and negative keywords for fallback
|
| 58 |
+
URDU_POSITIVE_KEYWORDS = [
|
| 59 |
+
'zinda bad', 'Ψ²ΩΨ―Ω Ψ¨Ψ§Ψ―', 'long live',
|
| 60 |
+
'nice', 'good', 'great', 'best', 'love', 'like', 'support',
|
| 61 |
+
'ΨΩ', 'truth', 'Ψ΅ΨΫΨ', 'correct',
|
| 62 |
+
'ΩΎΨ§Ϊ©Ψ³ΨͺΨ§Ω', 'pakistan', 'ΩΨ§Ψ¦Ψ―', 'leader',
|
| 63 |
+
'ΨͺΨΨ±ΫΪ©', 'movement', 'Ψ§ΩΨ³Ψ§ΩΫ', 'human'
|
| 64 |
+
]
|
| 65 |
|
| 66 |
+
URDU_NEGATIVE_KEYWORDS = [
|
| 67 |
+
'bad', 'Ψ¨Ψ±Ψ§', 'wrong', 'ΨΊΩΨ·', 'hate', 'ΩΩΨ±Ψͺ',
|
| 68 |
+
'corrupt', 'Ϊ©Ψ±ΩΎΩΉ', 'false', 'Ψ¬ΪΎΩΩΉ', 'liar', 'Ψ¬ΪΎΩΩΉΨ§'
|
| 69 |
+
]
|
| 70 |
+
|
| 71 |
+
def analyze_urdu_sentiment_enhanced(text):
|
| 72 |
+
"""Enhanced Urdu sentiment analysis"""
|
| 73 |
+
text_lower = text.lower()
|
| 74 |
+
|
| 75 |
+
# Check for positive Urdu phrases
|
| 76 |
+
positive_score = 0
|
| 77 |
+
negative_score = 0
|
| 78 |
+
|
| 79 |
+
for keyword in URDU_POSITIVE_KEYWORDS:
|
| 80 |
+
if keyword in text_lower:
|
| 81 |
+
positive_score += 1
|
| 82 |
+
|
| 83 |
+
for keyword in URDU_NEGATIVE_KEYWORDS:
|
| 84 |
+
if keyword in text_lower:
|
| 85 |
+
negative_score += 1
|
| 86 |
+
|
| 87 |
+
# Special handling for "zinda bad" pattern
|
| 88 |
+
if 'zinda bad' in text_lower or 'Ψ²ΩΨ―Ω Ψ¨Ψ§Ψ―' in text_lower:
|
| 89 |
+
positive_score += 3 # Strong positive
|
| 90 |
+
|
| 91 |
+
if positive_score > negative_score:
|
| 92 |
+
return 'Positive', min(0.9, 0.5 + (positive_score * 0.1))
|
| 93 |
+
elif negative_score > positive_score:
|
| 94 |
+
return 'Negative', min(0.9, 0.5 + (negative_score * 0.1))
|
| 95 |
+
else:
|
| 96 |
+
return 'Neutral', 0.5
|
| 97 |
|
| 98 |
class YouTubeSentimentAnalyzer:
|
|
|
|
|
|
|
| 99 |
def __init__(self):
|
|
|
|
| 100 |
self.youtube = None
|
| 101 |
if YOUTUBE_API_KEY:
|
| 102 |
try:
|
| 103 |
self.youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
|
| 104 |
+
print("β
YouTube API initialized")
|
| 105 |
except Exception as e:
|
| 106 |
+
print(f"β API Error: {e}")
|
| 107 |
|
| 108 |
def extract_comments(self, video_url, max_comments=150):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
if not self.youtube:
|
| 110 |
+
return [], "YouTube API not configured."
|
| 111 |
|
| 112 |
try:
|
|
|
|
| 113 |
if 'v=' in video_url:
|
| 114 |
video_id = video_url.split('v=')[-1].split('&')[0]
|
| 115 |
elif 'youtu.be/' in video_url:
|
|
|
|
| 117 |
else:
|
| 118 |
video_id = video_url
|
| 119 |
|
|
|
|
|
|
|
| 120 |
comments = []
|
| 121 |
next_page_token = None
|
| 122 |
|
|
|
|
| 136 |
'author': comment_data.get('authorDisplayName', 'Anonymous'),
|
| 137 |
'text': comment_data.get('textDisplay', ''),
|
| 138 |
'likes': comment_data.get('likeCount', 0),
|
| 139 |
+
'time': comment_data.get('publishedAt', '')
|
|
|
|
| 140 |
})
|
| 141 |
|
| 142 |
next_page_token = response.get('nextPageToken')
|
| 143 |
if not next_page_token:
|
| 144 |
break
|
| 145 |
|
|
|
|
| 146 |
return comments, None
|
|
|
|
| 147 |
except HttpError as e:
|
| 148 |
if e.resp.status == 403:
|
| 149 |
+
return [], "Quota exceeded. Try again tomorrow."
|
| 150 |
+
return [], str(e)
|
|
|
|
|
|
|
|
|
|
| 151 |
except Exception as e:
|
| 152 |
+
return [], str(e)
|
| 153 |
|
| 154 |
def clean_text(self, text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
if not text or not isinstance(text, str):
|
| 156 |
return ""
|
|
|
|
|
|
|
| 157 |
text = re.sub(r'http\S+|www\S+|https\S+', '', text)
|
|
|
|
| 158 |
text = re.sub(r'<.*?>', '', text)
|
|
|
|
|
|
|
|
|
|
| 159 |
text = re.sub(r'\s+', ' ', text).strip()
|
|
|
|
| 160 |
return text
|
| 161 |
|
| 162 |
def detect_language(self, text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
try:
|
| 164 |
if not text or len(text) < 3:
|
| 165 |
return 'unknown'
|
| 166 |
+
if re.search(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF]', text):
|
|
|
|
|
|
|
| 167 |
return 'urdu'
|
| 168 |
+
if re.search(r'zinda|bad|hai|ka|ki|ko|se|mein', text.lower()):
|
| 169 |
+
return 'urdu' # Roman Urdu detection
|
| 170 |
lang = detect(text)
|
| 171 |
return 'english' if lang == 'en' else 'other'
|
| 172 |
+
except:
|
|
|
|
| 173 |
return 'unknown'
|
| 174 |
|
| 175 |
+
def analyze_sentiment_urdu(self, text):
|
| 176 |
+
"""Analyze Urdu/Roman Urdu sentiment"""
|
| 177 |
+
if URDU_MODEL_AVAILABLE:
|
| 178 |
+
try:
|
| 179 |
+
result = urdu_sentiment_pipeline(text)[0]
|
| 180 |
+
label = result['label']
|
| 181 |
+
score = result['score']
|
| 182 |
+
if label in ['LABEL_0', 'Positive']:
|
| 183 |
+
return 'Positive', score
|
| 184 |
+
elif label in ['LABEL_1', 'Negative']:
|
| 185 |
+
return 'Negative', score
|
| 186 |
+
return 'Neutral', score
|
| 187 |
+
except:
|
| 188 |
+
return analyze_urdu_sentiment_enhanced(text)
|
| 189 |
+
else:
|
| 190 |
+
return analyze_urdu_sentiment_enhanced(text)
|
|
|
|
|
|
|
| 191 |
|
| 192 |
+
def analyze_sentiment_english(self, text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
try:
|
| 194 |
blob = TextBlob(text)
|
| 195 |
polarity = blob.sentiment.polarity
|
|
|
|
| 196 |
if polarity > 0.1:
|
| 197 |
return 'Positive', polarity
|
| 198 |
elif polarity < -0.1:
|
| 199 |
return 'Negative', polarity
|
| 200 |
+
return 'Neutral', polarity
|
| 201 |
+
except:
|
|
|
|
| 202 |
return 'Neutral', 0.0
|
| 203 |
|
| 204 |
def process_comments(self, comments):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
if not comments:
|
| 206 |
return pd.DataFrame()
|
| 207 |
|
|
|
|
| 208 |
df = pd.DataFrame(comments)
|
|
|
|
|
|
|
| 209 |
df['clean_text'] = df['text'].apply(self.clean_text)
|
| 210 |
df = df[df['clean_text'].str.len() > 2]
|
| 211 |
|
| 212 |
if len(df) == 0:
|
| 213 |
return df
|
| 214 |
|
|
|
|
| 215 |
df['language'] = df['clean_text'].apply(self.detect_language)
|
| 216 |
|
|
|
|
| 217 |
sentiments = []
|
| 218 |
+
scores = []
|
| 219 |
+
for idx, row in df.iterrows():
|
| 220 |
+
if row['language'] == 'english':
|
| 221 |
+
sent, score = self.analyze_sentiment_english(row['clean_text'])
|
| 222 |
+
else:
|
| 223 |
+
sent, score = self.analyze_sentiment_urdu(row['clean_text'])
|
| 224 |
sentiments.append(sent)
|
| 225 |
+
scores.append(score)
|
| 226 |
|
| 227 |
df['sentiment'] = sentiments
|
| 228 |
+
df['polarity'] = scores
|
| 229 |
|
|
|
|
| 230 |
if EMOJI_AVAILABLE:
|
| 231 |
+
df['emojis'] = df['text'].apply(lambda x: [c for c in str(x) if emoji.is_emoji(c)])
|
| 232 |
df['emoji_count'] = df['emojis'].apply(len)
|
| 233 |
df['has_emoji'] = df['emoji_count'] > 0
|
| 234 |
|
| 235 |
return df
|
| 236 |
|
|
|
|
| 237 |
def create_visualizations(df):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
if len(df) == 0:
|
| 239 |
return None, None, None, None, None
|
| 240 |
|
| 241 |
+
# Sentiment Pie
|
| 242 |
fig1, ax1 = plt.subplots(figsize=(10, 8))
|
| 243 |
+
counts = df['sentiment'].value_counts()
|
| 244 |
+
colors = {'Positive': '#2ecc71', 'Negative': '#e74c3c', 'Neutral': '#95a5a6'}
|
| 245 |
+
ax1.pie(counts.values, labels=counts.index, autopct='%1.1f%%',
|
| 246 |
+
colors=[colors.get(x, '#95a5a6') for x in counts.index], startangle=90)
|
| 247 |
+
ax1.set_title('Sentiment Distribution', fontsize=16, fontweight='bold')
|
|
|
|
|
|
|
| 248 |
plt.tight_layout()
|
| 249 |
+
pie = fig1
|
| 250 |
|
| 251 |
+
# Language Pie
|
| 252 |
fig2, ax2 = plt.subplots(figsize=(10, 8))
|
| 253 |
lang_counts = df['language'].value_counts()
|
| 254 |
+
lang_labels = {'english': 'English', 'urdu': 'Urdu/Roman Urdu', 'other': 'Other'}
|
| 255 |
+
ax2.pie(lang_counts.values, labels=[lang_labels.get(l, l) for l in lang_counts.index],
|
| 256 |
+
autopct='%1.1f%%', startangle=90)
|
| 257 |
+
ax2.set_title('Language Distribution', fontsize=16, fontweight='bold')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
plt.tight_layout()
|
| 259 |
+
lang_pie = fig2
|
| 260 |
|
| 261 |
+
# Sentiment Bar
|
| 262 |
fig3, ax3 = plt.subplots(figsize=(10, 6))
|
| 263 |
+
bars = ax3.bar(counts.index, counts.values, color=[colors.get(x, '#95a5a6') for x in counts.index])
|
|
|
|
|
|
|
|
|
|
| 264 |
for bar in bars:
|
| 265 |
+
ax3.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 5, f'{int(bar.get_height())}',
|
| 266 |
+
ha='center', va='bottom')
|
| 267 |
+
ax3.set_title('Sentiment Bar Chart', fontsize=14, fontweight='bold')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
ax3.grid(axis='y', alpha=0.3)
|
| 269 |
plt.tight_layout()
|
| 270 |
+
bar = fig3
|
| 271 |
|
| 272 |
+
# Top Comments Table
|
| 273 |
fig4, ax4 = plt.subplots(figsize=(14, 8))
|
| 274 |
ax4.axis('tight')
|
| 275 |
ax4.axis('off')
|
| 276 |
+
top = df.nlargest(10, 'likes')[['author', 'text', 'likes', 'sentiment']]
|
| 277 |
+
top['text'] = top['text'].apply(lambda x: str(x)[:70] + '...' if len(str(x)) > 70 else str(x))
|
| 278 |
+
table = ax4.table(cellText=top.values, colLabels=['Author', 'Comment', 'Likes', 'Sentiment'],
|
| 279 |
+
cellLoc='left', loc='center', colWidths=[0.15, 0.55, 0.1, 0.1])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
table.auto_set_font_size(False)
|
| 281 |
table.set_fontsize(9)
|
| 282 |
+
for i, sent in enumerate(top['sentiment'].values, start=1):
|
| 283 |
+
if sent == 'Positive':
|
|
|
|
|
|
|
|
|
|
| 284 |
table[(i, 3)].set_facecolor('#90EE90')
|
| 285 |
+
elif sent == 'Negative':
|
| 286 |
table[(i, 3)].set_facecolor('#FFB6C1')
|
| 287 |
+
ax4.set_title('Top 10 Engaging Comments', fontsize=16, fontweight='bold')
|
|
|
|
|
|
|
|
|
|
| 288 |
plt.tight_layout()
|
| 289 |
top_table = fig4
|
| 290 |
|
| 291 |
+
# Word Cloud
|
| 292 |
fig5, ax5 = plt.subplots(figsize=(12, 6))
|
| 293 |
all_text = ' '.join(df['clean_text'].tolist())
|
|
|
|
| 294 |
if all_text.strip():
|
| 295 |
try:
|
| 296 |
+
wc = WordCloud(width=800, height=400, background_color='white', max_words=100).generate(all_text)
|
| 297 |
+
ax5.imshow(wc, interpolation='bilinear')
|
|
|
|
|
|
|
| 298 |
ax5.axis('off')
|
| 299 |
+
ax5.set_title('Word Cloud', fontsize=14, fontweight='bold')
|
| 300 |
+
except:
|
| 301 |
+
ax5.text(0.5, 0.5, 'Could not generate word cloud', ha='center', va='center')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
plt.tight_layout()
|
| 303 |
+
wc = fig5
|
| 304 |
|
| 305 |
+
return pie, lang_pie, bar, top_table, wc
|
|
|
|
| 306 |
|
| 307 |
def analyze_youtube_video(video_url, progress=gr.Progress()):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
if not video_url or not video_url.strip():
|
| 309 |
+
return "β Enter a valid URL", None, None, None, None, None
|
| 310 |
|
|
|
|
| 311 |
if not YOUTUBE_API_KEY:
|
| 312 |
+
return "β Add GoogleAPIKey to Secrets", None, None, None, None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
try:
|
| 315 |
+
progress(0.1, desc="Initializing...")
|
|
|
|
| 316 |
analyzer = YouTubeSentimentAnalyzer()
|
| 317 |
|
| 318 |
+
progress(0.2, desc="Fetching comments...")
|
| 319 |
+
comments, error = analyzer.extract_comments(video_url, max_comments=150)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
|
| 321 |
if error:
|
| 322 |
+
return f"β {error}", None, None, None, None, None
|
| 323 |
|
| 324 |
if not comments:
|
| 325 |
+
return "β No comments found", None, None, None, None, None
|
| 326 |
|
| 327 |
+
progress(0.5, desc="Analyzing sentiment...")
|
|
|
|
| 328 |
df = analyzer.process_comments(comments)
|
| 329 |
|
| 330 |
if len(df) == 0:
|
| 331 |
+
return "β No valid comments", None, None, None, None, None
|
| 332 |
|
|
|
|
| 333 |
progress(0.7, desc="Generating statistics...")
|
| 334 |
|
| 335 |
+
total = len(df)
|
| 336 |
+
positive = len(df[df['sentiment'] == 'Positive'])
|
| 337 |
+
negative = len(df[df['sentiment'] == 'Negative'])
|
| 338 |
+
neutral = len(df[df['sentiment'] == 'Neutral'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
urdu_count = len(df[df['language'] == 'urdu'])
|
| 340 |
+
english_count = len(df[df['language'] == 'english'])
|
| 341 |
|
| 342 |
+
stats = f"""
|
| 343 |
+
## β
Analysis Complete!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
|
| 345 |
+
### π Results
|
| 346 |
+
- **Total Comments:** {total}
|
| 347 |
+
- **Positive:** {positive} ({positive/total*100:.1f}%)
|
| 348 |
+
- **Negative:** {negative} ({negative/total*100:.1f}%)
|
| 349 |
+
- **Neutral:** {neutral} ({neutral/total*100:.1f}%)
|
| 350 |
+
- **Urdu/Roman Urdu:** {urdu_count} ({urdu_count/total*100:.1f}%)
|
| 351 |
+
- **English:** {english_count} ({english_count/total*100:.1f}%)
|
| 352 |
|
| 353 |
+
**Note:** Urdu phrases like "Khan zinda bad" are now correctly classified as Positive!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
"""
|
| 355 |
|
|
|
|
| 356 |
progress(0.9, desc="Creating visualizations...")
|
| 357 |
+
pie, lang_pie, bar, top_table, wc = create_visualizations(df)
|
| 358 |
|
| 359 |
progress(1.0, desc="Complete!")
|
| 360 |
+
return stats, pie, lang_pie, bar, top_table, wc
|
|
|
|
| 361 |
|
| 362 |
except Exception as e:
|
| 363 |
+
return f"β Error: {str(e)}", None, None, None, None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
|
| 365 |
+
# Create interface
|
| 366 |
+
with gr.Blocks(title="YouTube Sentiment Analyzer", theme=gr.themes.Soft()) as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
gr.Markdown("""
|
| 368 |
# π¬ YouTube Comment Sentiment Analyzer
|
| 369 |
|
| 370 |
+
**Now with proper Urdu/Roman Urdu support!**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
|
| 372 |
+
Phrases like "Khan zinda bad" (Long live Khan) are correctly classified as **Positive** β
|
|
|
|
|
|
|
|
|
|
| 373 |
""")
|
| 374 |
|
| 375 |
with gr.Row():
|
| 376 |
+
url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...", scale=4)
|
| 377 |
+
analyze_btn = gr.Button("Analyze", variant="primary", scale=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
|
| 379 |
+
stats_output = gr.Markdown("### Enter a URL above")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
|
| 381 |
with gr.Row():
|
| 382 |
+
sentiment_plot = gr.Plot(label="Sentiment Distribution")
|
| 383 |
+
language_plot = gr.Plot(label="Language Distribution")
|
| 384 |
|
| 385 |
with gr.Row():
|
| 386 |
+
bar_plot = gr.Plot(label="Sentiment Bar Chart")
|
| 387 |
+
wordcloud_plot = gr.Plot(label="Word Cloud")
|
| 388 |
|
| 389 |
with gr.Row():
|
| 390 |
+
top_plot = gr.Plot(label="Top Comments")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
|
| 392 |
+
analyze_btn.click(analyze_youtube_video, [url_input],
|
| 393 |
+
[stats_output, sentiment_plot, language_plot, bar_plot, top_plot, wordcloud_plot])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
|
|
|
|
| 395 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|