{
  "model_name": "detector-clickbait-br-model",
  "version": "1.0.0",
  "model_type": "sklearn",
  "task": "text-classification",
  "framework": "scikit-learn",
  "sklearn_version": "1.7.2",
  "language": "pt",
  "license": "mit",
  "pipeline": {
    "preprocessor": "tfidf_vectorizer.pkl",
    "scaler": "scaler.pkl",
    "model": "melhor_modelo.pkl"
  },
  "model_architecture": {
    "type": "Random Forest Classifier (Optimized)",
    "base_model": "RandomForestClassifier",
    "optimization": {
      "method": "RandomizedSearchCV",
      "n_iter": 50,
      "cv_folds": 5,
      "scoring": "f1",
      "random_state": 42
    },
    "hyperparameters": {
      "note": "Optimized via RandomizedSearchCV",
      "search_space": {
        "n_estimators": [100, 200, 300, 500],
        "max_depth": ["None", 10, 20, 30, 50],
        "min_samples_split": [2, 5, 10],
        "min_samples_leaf": [1, 2, 4],
        "max_features": ["sqrt", "log2", "None"],
        "bootstrap": [true, false]
      }
    },
    "best_params": "See model object for details"
  },
  "features": {
    "tfidf": {
      "max_features": 5000,
      "ngram_range": [1, 2],
      "min_df": 2,
      "vocabulary_size": 200,
      "analyzer": "word"
    },
    "numeric": [
      "word_count",
      "char_count",
      "exclamation_count",
      "question_count"
    ],
    "total_features": 204
  },
  "dataset": {
    "total_samples": 9532,
    "train_samples": 7625,
    "test_samples": 1907,
    "train_test_split": 0.8,
    "random_state": 42,
    "stratified": true,
    "class_distribution": {
      "non_clickbait": {
        "count": 4457,
        "percentage": 46.76
      },
      "clickbait": {
        "count": 5075,
        "percentage": 53.24
      }
    }
  },
  "performance": {
    "test_set": {
      "accuracy": 0.9706,
      "precision": 0.9829,
      "recall": 0.9616,
      "f1_score": 0.9721,
      "note": "Best performing model among 9 tested"
    },
    "cross_validation": {
      "cv_folds": 5,
      "cv_strategy": "StratifiedKFold",
      "note": "Used during RandomizedSearchCV"
    }
  },
  "classes": {
    "0": "Não-Clickbait",
    "1": "Clickbait"
  },
  "preprocessing": {
    "text": {
      "lowercase": true,
      "remove_stopwords": true,
      "stopwords_language": "portuguese",
      "stemming": false,
      "remove_punctuation": false,
      "note": "Punctuation preserved for feature extraction"
    },
    "numeric_features": {
      "scaler": "StandardScaler",
      "features_scaled": [
        "word_count",
        "char_count",
        "exclamation_count",
        "question_count"
      ]
    }
  },
  "model_comparison": {
    "models_tested": 9,
    "ranking_by_f1": [
      {
        "rank": 1,
        "model": "RF Otimizado",
        "f1_score": 0.9721,
        "accuracy": 0.9706
      },
      {
        "rank": 2,
        "model": "Random Forest",
        "f1_score": 0.9715,
        "accuracy": 0.9701
      },
      {
        "rank": 3,
        "model": "Stacking",
        "f1_score": 0.9698,
        "accuracy": 0.9680
      },
      {
        "rank": 4,
        "model": "SVM",
        "f1_score": 0.9685,
        "accuracy": 0.9670
      },
      {
        "rank": 5,
        "model": "Voting (Soft)",
        "f1_score": 0.9679,
        "accuracy": 0.9664
      },
      {
        "rank": 6,
        "model": "Voting (Hard)",
        "f1_score": 0.9673,
        "accuracy": 0.9659
      },
      {
        "rank": 7,
        "model": "Regressão Logística",
        "f1_score": 0.9611,
        "accuracy": 0.9596
      },
      {
        "rank": 8,
        "model": "Naive Bayes",
        "f1_score": 0.9159,
        "accuracy": 0.9077
      },
      {
        "rank": 9,
        "model": "Baseline",
        "f1_score": 0.4678,
        "accuracy": 0.0000
      }
    ]
  },
  "inference": {
    "input_format": "string (headline text)",
    "output_format": {
      "prediction": "string (Clickbait or Não-Clickbait)",
      "probabilities": {
        "non_clickbait": "float",
        "clickbait": "float"
      }
    },
    "preprocessing_required": true
  },
  "training_info": {
    "date": "2025",
    "training_time": "~22 minutes (optimization included)",
    "hardware": "CPU",
    "random_state": 42
  }
}