{ "model_name": "detector-clickbait-br-model", "version": "1.0.0", "model_type": "sklearn", "task": "text-classification", "framework": "scikit-learn", "sklearn_version": "1.7.2", "language": "pt", "license": "mit", "pipeline": { "preprocessor": "tfidf_vectorizer.pkl", "scaler": "scaler.pkl", "model": "melhor_modelo.pkl" }, "model_architecture": { "type": "Random Forest Classifier (Optimized)", "base_model": "RandomForestClassifier", "optimization": { "method": "RandomizedSearchCV", "n_iter": 50, "cv_folds": 5, "scoring": "f1", "random_state": 42 }, "hyperparameters": { "note": "Optimized via RandomizedSearchCV", "search_space": { "n_estimators": [100, 200, 300, 500], "max_depth": ["None", 10, 20, 30, 50], "min_samples_split": [2, 5, 10], "min_samples_leaf": [1, 2, 4], "max_features": ["sqrt", "log2", "None"], "bootstrap": [true, false] } }, "best_params": "See model object for details" }, "features": { "tfidf": { "max_features": 5000, "ngram_range": [1, 2], "min_df": 2, "vocabulary_size": 200, "analyzer": "word" }, "numeric": [ "word_count", "char_count", "exclamation_count", "question_count" ], "total_features": 204 }, "dataset": { "total_samples": 9532, "train_samples": 7625, "test_samples": 1907, "train_test_split": 0.8, "random_state": 42, "stratified": true, "class_distribution": { "non_clickbait": { "count": 4457, "percentage": 46.76 }, "clickbait": { "count": 5075, "percentage": 53.24 } } }, "performance": { "test_set": { "accuracy": 0.9706, "precision": 0.9829, "recall": 0.9616, "f1_score": 0.9721, "note": "Best performing model among 9 tested" }, "cross_validation": { "cv_folds": 5, "cv_strategy": "StratifiedKFold", "note": "Used during RandomizedSearchCV" } }, "classes": { "0": "Não-Clickbait", "1": "Clickbait" }, "preprocessing": { "text": { "lowercase": true, "remove_stopwords": true, "stopwords_language": "portuguese", "stemming": false, "remove_punctuation": false, "note": "Punctuation preserved for feature extraction" }, "numeric_features": { "scaler": "StandardScaler", "features_scaled": [ "word_count", "char_count", "exclamation_count", "question_count" ] } }, "model_comparison": { "models_tested": 9, "ranking_by_f1": [ { "rank": 1, "model": "RF Otimizado", "f1_score": 0.9721, "accuracy": 0.9706 }, { "rank": 2, "model": "Random Forest", "f1_score": 0.9715, "accuracy": 0.9701 }, { "rank": 3, "model": "Stacking", "f1_score": 0.9698, "accuracy": 0.9680 }, { "rank": 4, "model": "SVM", "f1_score": 0.9685, "accuracy": 0.9670 }, { "rank": 5, "model": "Voting (Soft)", "f1_score": 0.9679, "accuracy": 0.9664 }, { "rank": 6, "model": "Voting (Hard)", "f1_score": 0.9673, "accuracy": 0.9659 }, { "rank": 7, "model": "Regressão Logística", "f1_score": 0.9611, "accuracy": 0.9596 }, { "rank": 8, "model": "Naive Bayes", "f1_score": 0.9159, "accuracy": 0.9077 }, { "rank": 9, "model": "Baseline", "f1_score": 0.4678, "accuracy": 0.0000 } ] }, "inference": { "input_format": "string (headline text)", "output_format": { "prediction": "string (Clickbait or Não-Clickbait)", "probabilities": { "non_clickbait": "float", "clickbait": "float" } }, "preprocessing_required": true }, "training_info": { "date": "2025", "training_time": "~22 minutes (optimization included)", "hardware": "CPU", "random_state": 42 } }