maryangel101 commited on
Commit
8d2ec7a
·
0 Parent(s):

Initial commit with calculator app and CI/CD workflow

Browse files
.github/scripts/anomaly_detector.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import sys
4
+ import json
5
+
6
+ def download_workflow_logs(run_id, token):
7
+ """Download logs from a specific workflow run"""
8
+ headers = {
9
+ 'Authorization': f'token {token}',
10
+ 'Accept': 'application/vnd.github.v3+json'
11
+ }
12
+
13
+ # Get repository info from environment
14
+ repo_full_name = os.environ.get('GITHUB_REPOSITORY')
15
+
16
+ # Download logs
17
+ url = f'https://api.github.com/repos/{repo_full_name}/actions/runs/{run_id}/logs'
18
+ response = requests.get(url, headers=headers)
19
+
20
+ if response.status_code == 200:
21
+ return response.text
22
+ else:
23
+ print(f"Failed to download logs: {response.status_code}")
24
+ return None
25
+
26
+ def analyze_logs_with_model(log_content, model_url):
27
+ """Send logs to the model API for analysis"""
28
+ try:
29
+ response = requests.post(
30
+ f"{model_url}/predict",
31
+ json={
32
+ "log_content": log_content,
33
+ "include_explanation": True
34
+ },
35
+ timeout=30
36
+ )
37
+
38
+ if response.status_code == 200:
39
+ return response.json()
40
+ else:
41
+ print(f"Model API error: {response.status_code}")
42
+ return None
43
+
44
+ except requests.RequestException as e:
45
+ print(f"Failed to call model API: {e}")
46
+ return None
47
+
48
+ def main():
49
+ # Get environment variables
50
+ github_token = os.environ.get('GITHUB_TOKEN')
51
+ workflow_run_id = os.environ.get('WORKFLOW_RUN_ID')
52
+ model_api_url = os.environ.get('MODEL_API_URL')
53
+
54
+ if not all([github_token, workflow_run_id, model_api_url]):
55
+ print("Missing required environment variables")
56
+ sys.exit(1)
57
+
58
+ print(f"Analyzing workflow run: {workflow_run_id}")
59
+
60
+ # Download logs
61
+ logs = download_workflow_logs(workflow_run_id, github_token)
62
+ if not logs:
63
+ print("Failed to download logs")
64
+ sys.exit(1)
65
+
66
+ # Analyze with model
67
+ result = analyze_logs_with_model(logs, model_api_url)
68
+ if not result:
69
+ print("Failed to analyze logs with model")
70
+ sys.exit(1)
71
+
72
+ print(f"Analysis result: {json.dumps(result, indent=2)}")
73
+
74
+ # Check if anomaly detected
75
+ if result.get('is_anomaly', False):
76
+ print("🚨 ANOMALY DETECTED!")
77
+ print(f"Confidence: {result.get('confidence', 0):.2%}")
78
+ print(f"Anomaly Probability: {result.get('anomaly_probability', 0):.2%}")
79
+
80
+ if result.get('explanation'):
81
+ print("Explanation:", result['explanation'])
82
+
83
+ # Exit with error code to mark step as failed
84
+ sys.exit(1)
85
+ else:
86
+ print("✅ No anomalies detected")
87
+ sys.exit(0)
88
+
89
+ if __name__ == "__main__":
90
+ main()
.github/workflows/anomaly-check.yml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Anomaly Detection Check
2
+
3
+ on:
4
+ workflow_run:
5
+ workflows: ["CI Pipeline"]
6
+ types:
7
+ - completed
8
+
9
+ jobs:
10
+ anomaly-check:
11
+ runs-on: ubuntu-latest
12
+ if: ${{ github.event.workflow_run.conclusion != 'cancelled' }}
13
+
14
+ steps:
15
+ - name: Checkout code
16
+ uses: actions/checkout@v3
17
+
18
+ - name: Set up Python
19
+ uses: actions/setup-python@v3
20
+ with:
21
+ python-version: '3.9'
22
+
23
+ - name: Install dependencies
24
+ run: |
25
+ pip install requests
26
+
27
+ - name: Download and analyze workflow logs
28
+ env:
29
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
30
+ WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
31
+ MODEL_API_URL: ${{ secrets.MODEL_API_URL }} # Set this in your repo secrets
32
+ run: |
33
+ python .github/scripts/anomaly_detector.py
34
+
35
+ - name: Create issue if anomaly detected
36
+ if: failure()
37
+ uses: actions/github-script@v6
38
+ with:
39
+ script: |
40
+ github.rest.issues.create({
41
+ owner: context.repo.owner,
42
+ repo: context.repo.repo,
43
+ title: 'CI/CD Anomaly Detected',
44
+ body: 'An anomaly was detected in the recent CI/CD pipeline run. Please investigate.',
45
+ labels: ['anomaly', 'ci-cd']
46
+ })
.github/workflows/main.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI Pipeline
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - uses: actions/checkout@v3
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v3
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ pip install -r requirements.txt
25
+
26
+ - name: Run tests
27
+ run: |
28
+ pytest -v test_calculator.py
api_service.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ import joblib
4
+ import re
5
+ from sklearn.ensemble import RandomForestClassifier
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ import shap
8
+ import numpy as np
9
+
10
+ app = FastAPI(title="CI/CD Log Anomaly Detection API")
11
+
12
+ # Global variables for model and vectorizer
13
+ model = None
14
+ vectorizer = None
15
+ explainer = None
16
+
17
+ class LogRequest(BaseModel):
18
+ log_content: str
19
+ include_explanation: bool = False
20
+
21
+ class PredictionResponse(BaseModel):
22
+ is_anomaly: bool
23
+ confidence: float
24
+ anomaly_probability: float
25
+ explanation: dict = None
26
+
27
+ def clean_log(log_content):
28
+ """Clean and normalize log content"""
29
+ # Remove timestamps
30
+ log_content = re.sub(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z', '[TIMESTAMP]', log_content)
31
+
32
+ # Remove specific IDs and numbers
33
+ log_content = re.sub(r'run_\d+', 'run_ID', log_content)
34
+ log_content = re.sub(r'job_\d+', 'job_ID', log_content)
35
+ log_content = re.sub(r'\b\d{8,}\b', '[ID]', log_content)
36
+
37
+ # Remove paths that might be environment-specific
38
+ log_content = re.sub(r'/home/[^/\s]+', '/home/USER', log_content)
39
+ log_content = re.sub(r'/tmp/[^/\s]+', '/tmp/TEMP', log_content)
40
+
41
+ # Normalize whitespace
42
+ log_content = re.sub(r'\s+', ' ', log_content)
43
+
44
+ return log_content.strip()
45
+
46
+ @app.on_event("startup")
47
+ async def load_model():
48
+ """Load the trained model and vectorizer on startup"""
49
+ global model, vectorizer, explainer
50
+
51
+ try:
52
+ model = joblib.load('anomaly_model.pkl')
53
+ vectorizer = joblib.load('vectorizer.pkl')
54
+
55
+ # Create SHAP explainer (simplified for speed)
56
+ print("Initializing SHAP explainer...")
57
+
58
+ except FileNotFoundError:
59
+ print("Warning: Model files not found. Please train the model first.")
60
+
61
+ def get_feature_importance_explanation(log_content, prediction_proba):
62
+ """Get simple feature importance explanation"""
63
+ try:
64
+ cleaned_log = clean_log(log_content)
65
+ log_vector = vectorizer.transform([cleaned_log])
66
+
67
+ # Get feature names and their importance
68
+ feature_names = vectorizer.get_feature_names_out()
69
+ feature_weights = log_vector.toarray()[0]
70
+
71
+ # Get top features that contributed to the prediction
72
+ top_indices = np.argsort(feature_weights)[-10:][::-1] # Top 10 features
73
+ top_features = []
74
+
75
+ for idx in top_indices:
76
+ if feature_weights[idx] > 0:
77
+ top_features.append({
78
+ 'feature': feature_names[idx],
79
+ 'weight': float(feature_weights[idx]),
80
+ 'impact': 'anomaly' if prediction_proba[1] > 0.5 else 'normal'
81
+ })
82
+
83
+ return {
84
+ 'top_contributing_features': top_features[:5],
85
+ 'explanation': "Features with higher weights contributed more to the prediction"
86
+ }
87
+
88
+ except Exception as e:
89
+ return {
90
+ 'error': f"Could not generate explanation: {str(e)}",
91
+ 'explanation': "Feature importance analysis failed"
92
+ }
93
+
94
+ @app.post("/predict", response_model=PredictionResponse)
95
+ async def predict_anomaly(request: LogRequest):
96
+ """Predict if a log indicates an anomaly"""
97
+ global model, vectorizer
98
+
99
+ if model is None or vectorizer is None:
100
+ raise HTTPException(status_code=500, detail="Model not loaded. Please ensure model files exist.")
101
+
102
+ try:
103
+ # Preprocess log
104
+ cleaned_log = clean_log(request.log_content)
105
+ log_vector = vectorizer.transform([cleaned_log])
106
+
107
+ # Make prediction
108
+ prediction = model.predict(log_vector)[0]
109
+ probabilities = model.predict_proba(log_vector)[0]
110
+
111
+ response = PredictionResponse(
112
+ is_anomaly=bool(prediction),
113
+ confidence=float(max(probabilities)),
114
+ anomaly_probability=float(probabilities[1]) if len(probabilities) > 1 else 0.0
115
+ )
116
+
117
+ # Add explanation if requested
118
+ if request.include_explanation:
119
+ response.explanation = get_feature_importance_explanation(request.log_content, probabilities)
120
+
121
+ return response
122
+
123
+ except Exception as e:
124
+ raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
125
+
126
+ @app.get("/health")
127
+ async def health_check():
128
+ """Health check endpoint"""
129
+ return {
130
+ "status": "healthy",
131
+ "model_loaded": model is not None,
132
+ "vectorizer_loaded": vectorizer is not None
133
+ }
134
+
135
+ @app.get("/")
136
+ async def root():
137
+ """Root endpoint with API information"""
138
+ return {
139
+ "message": "CI/CD Log Anomaly Detection API",
140
+ "version": "1.0.0",
141
+ "endpoints": {
142
+ "predict": "/predict (POST)",
143
+ "health": "/health (GET)",
144
+ "docs": "/docs (GET)"
145
+ }
146
+ }
147
+
148
+ if __name__ == "__main__":
149
+ import uvicorn
150
+ uvicorn.run(app, host="0.0.0.0", port=8000)
calculator.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def add(x, y):
2
+ return x + y
3
+
4
+ def subtract(x, y):
5
+ return x - y
6
+
7
+ def divide(x, y):
8
+ # Intentionally buggy - no zero handling
9
+ return x / y
10
+
11
+ def multiply(x, y):
12
+ return x * y
13
+
14
+ if __name__ == "__main__":
15
+ print("Calculator module loaded")
collect_logs.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import json
4
+ import zipfile
5
+ import io
6
+ from datetime import datetime
7
+
8
+ class GitHubLogCollector:
9
+ def __init__(self, repo_owner, repo_name, token):
10
+ self.repo_owner = repo_owner
11
+ self.repo_name = repo_name
12
+ self.token = token
13
+ self.headers = {
14
+ 'Authorization': f'token {token}',
15
+ 'Accept': 'application/vnd.github.v3+json'
16
+ }
17
+
18
+ def get_workflow_runs(self, workflow_id='main.yml'):
19
+ url = f'https://api.github.com/repos/{self.repo_owner}/{self.repo_name}/actions/workflows/{workflow_id}/runs'
20
+ response = requests.get(url, headers=self.headers)
21
+ return response.json()
22
+
23
+ def download_log(self, run_id):
24
+ url = f'https://api.github.com/repos/{self.repo_owner}/{self.repo_name}/actions/runs/{run_id}/logs'
25
+ response = requests.get(url, headers=self.headers)
26
+
27
+ if response.status_code == 200:
28
+ # Extract zip content
29
+ with zipfile.ZipFile(io.BytesIO(response.content)) as zip_file:
30
+ log_content = ""
31
+ for file_name in zip_file.namelist():
32
+ with zip_file.open(file_name) as file:
33
+ log_content += file.read().decode('utf-8') + "\n"
34
+ return log_content
35
+ return None
36
+
37
+ def collect_all_logs(self):
38
+ os.makedirs('data/normal', exist_ok=True)
39
+ os.makedirs('data/anomalous', exist_ok=True)
40
+
41
+ runs = self.get_workflow_runs()
42
+
43
+ for run in runs['workflow_runs']:
44
+ run_id = run['id']
45
+ conclusion = run['conclusion']
46
+ created_at = run['created_at']
47
+
48
+ log_content = self.download_log(run_id)
49
+ if log_content:
50
+ # Clean filename
51
+ timestamp = datetime.fromisoformat(created_at.replace('Z', '+00:00')).strftime('%Y%m%d_%H%M%S')
52
+
53
+ if conclusion == 'success':
54
+ filename = f'data/normal/run_{run_id}_{timestamp}.log'
55
+ else:
56
+ filename = f'data/anomalous/run_{run_id}_{timestamp}.log'
57
+
58
+ with open(filename, 'w') as f:
59
+ f.write(log_content)
60
+
61
+ print(f"Saved: {filename} (Status: {conclusion})")
62
+
63
+ # Usage
64
+ if __name__ == "__main__":
65
+ # Replace with your values
66
+ REPO_OWNER = "your-username"
67
+ REPO_NAME = "your-repo-name"
68
+ GITHUB_TOKEN = "your-github-token" # Create at https://github.com/settings/tokens
69
+
70
+ collector = GitHubLogCollector(REPO_OWNER, REPO_NAME, GITHUB_TOKEN)
71
+ collector.collect_all_logs()
preprocess_logs.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import pandas as pd
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.ensemble import RandomForestClassifier
7
+ from sklearn.metrics import classification_report, confusion_matrix
8
+ import joblib
9
+ import numpy as np
10
+
11
+ class LogPreprocessor:
12
+ def __init__(self):
13
+ self.vectorizer = TfidfVectorizer(
14
+ max_features=1000,
15
+ stop_words='english',
16
+ ngram_range=(1, 2),
17
+ min_df=2
18
+ )
19
+
20
+ def clean_log(self, log_content):
21
+ # Remove timestamps
22
+ log_content = re.sub(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z', '[TIMESTAMP]', log_content)
23
+
24
+ # Remove specific IDs and numbers
25
+ log_content = re.sub(r'run_\d+', 'run_ID', log_content)
26
+ log_content = re.sub(r'job_\d+', 'job_ID', log_content)
27
+ log_content = re.sub(r'\b\d{8,}\b', '[ID]', log_content)
28
+
29
+ # Remove paths that might be environment-specific
30
+ log_content = re.sub(r'/home/[^/\s]+', '/home/USER', log_content)
31
+ log_content = re.sub(r'/tmp/[^/\s]+', '/tmp/TEMP', log_content)
32
+
33
+ # Normalize whitespace
34
+ log_content = re.sub(r'\s+', ' ', log_content)
35
+
36
+ return log_content.strip()
37
+
38
+ def load_logs(self, data_dir):
39
+ logs = []
40
+ labels = []
41
+
42
+ # Load normal logs
43
+ normal_dir = os.path.join(data_dir, 'normal')
44
+ if os.path.exists(normal_dir):
45
+ for filename in os.listdir(normal_dir):
46
+ filepath = os.path.join(normal_dir, filename)
47
+ with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
48
+ content = f.read()
49
+ logs.append(self.clean_log(content))
50
+ labels.append(0) # 0 for normal
51
+
52
+ # Load anomalous logs
53
+ anomalous_dir = os.path.join(data_dir, 'anomalous')
54
+ if os.path.exists(anomalous_dir):
55
+ for filename in os.listdir(anomalous_dir):
56
+ filepath = os.path.join(anomalous_dir, filename)
57
+ with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
58
+ content = f.read()
59
+ logs.append(self.clean_log(content))
60
+ labels.append(1) # 1 for anomalous
61
+
62
+ return logs, labels
63
+
64
+ class AnomalyDetectionModel:
65
+ def __init__(self):
66
+ self.preprocessor = LogPreprocessor()
67
+ self.model = RandomForestClassifier(
68
+ n_estimators=100,
69
+ random_state=42,
70
+ class_weight='balanced'
71
+ )
72
+ self.is_trained = False
73
+
74
+ def train(self, data_dir='data'):
75
+ print("Loading logs...")
76
+ logs, labels = self.preprocessor.load_logs(data_dir)
77
+
78
+ if len(logs) == 0:
79
+ raise ValueError("No logs found. Please run data collection first.")
80
+
81
+ print(f"Loaded {len(logs)} logs ({labels.count(0)} normal, {labels.count(1)} anomalous)")
82
+
83
+ # Vectorize logs
84
+ print("Vectorizing logs...")
85
+ X = self.preprocessor.vectorizer.fit_transform(logs)
86
+ y = np.array(labels)
87
+
88
+ # Split data
89
+ X_train, X_test, y_train, y_test = train_test_split(
90
+ X, y, test_size=0.2, random_state=42, stratify=y
91
+ )
92
+
93
+ # Train model
94
+ print("Training model...")
95
+ self.model.fit(X_train, y_train)
96
+
97
+ # Evaluate
98
+ y_pred = self.model.predict(X_test)
99
+ print("\nModel Performance:")
100
+ print(classification_report(y_test, y_pred, target_names=['Normal', 'Anomalous']))
101
+ print("\nConfusion Matrix:")
102
+ print(confusion_matrix(y_test, y_pred))
103
+
104
+ self.is_trained = True
105
+
106
+ # Save model and vectorizer
107
+ joblib.dump(self.model, 'anomaly_model.pkl')
108
+ joblib.dump(self.preprocessor.vectorizer, 'vectorizer.pkl')
109
+ print("\nModel saved as 'anomaly_model.pkl' and 'vectorizer.pkl'")
110
+
111
+ def predict(self, log_content):
112
+ if not self.is_trained:
113
+ # Load saved model
114
+ try:
115
+ self.model = joblib.load('anomaly_model.pkl')
116
+ self.preprocessor.vectorizer = joblib.load('vectorizer.pkl')
117
+ self.is_trained = True
118
+ except FileNotFoundError:
119
+ raise ValueError("No trained model found. Please train first.")
120
+
121
+ # Preprocess and predict
122
+ cleaned_log = self.preprocessor.clean_log(log_content)
123
+ log_vector = self.preprocessor.vectorizer.transform([cleaned_log])
124
+
125
+ prediction = self.model.predict(log_vector)[0]
126
+ probability = self.model.predict_proba(log_vector)[0]
127
+
128
+ return {
129
+ 'is_anomaly': bool(prediction),
130
+ 'confidence': float(max(probability)),
131
+ 'anomaly_probability': float(probability[1]) if len(probability) > 1 else 0.0
132
+ }
133
+
134
+ if __name__ == "__main__":
135
+ model = AnomalyDetectionModel()
136
+ model.train()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ scikit-learn==1.3.0
2
+ pandas==2.0.3
3
+ numpy==1.24.3
4
+ requests==2.31.0
5
+ python-dotenv==1.0.0
6
+ fastapi==0.100.1
7
+ uvicorn==0.23.2
8
+ gradio==3.44.0
9
+ PyGithub==1.59.0
10
+ tqdm==4.65.0
test_calculator.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from calculator import add, subtract, divide, multiply
3
+
4
+ def test_add():
5
+ assert add(2, 3) == 5
6
+ assert add(-1, 1) == 0
7
+
8
+ def test_subtract():
9
+ assert subtract(5, 3) == 2
10
+ assert subtract(0, 5) == -5
11
+
12
+ def test_multiply():
13
+ assert multiply(3, 4) == 12
14
+ assert multiply(0, 10) == 0
15
+
16
+ def test_divide():
17
+ assert divide(10, 2) == 5
18
+ assert divide(9, 3) == 3
19
+
20
+ # Flaky test that fails randomly
21
+ def test_flaky():
22
+ if random.random() < 0.3: # 30% chance to fail
23
+ assert False, "Flaky test failed randomly"
24
+ assert True
25
+
26
+ # Always failing test (comment out initially)
27
+ # def test_always_fail():
28
+ # assert 1 == 2, "This test always fails"
test_system.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import subprocess
4
+ import time
5
+ import requests
6
+ import json
7
+
8
+ def run_command(cmd):
9
+ """Run a shell command and return the result"""
10
+ try:
11
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
12
+ return result.returncode == 0, result.stdout, result.stderr
13
+ except Exception as e:
14
+ return False, "", str(e)
15
+
16
+ def test_api_locally():
17
+ """Test the FastAPI service locally"""
18
+ print("Testing API locally...")
19
+
20
+ # Start the API in background
21
+ print("Starting API server...")
22
+ api_process = subprocess.Popen(
23
+ ["python", "api_service.py"],
24
+ stdout=subprocess.PIPE,
25
+ stderr=subprocess.PIPE
26
+ )
27
+
28
+ # Wait for server to start
29
+ time.sleep(5)
30
+
31
+ try:
32
+ # Test health endpoint
33
+ response = requests.get("http://localhost:8000/health")
34
+ print(f"Health check: {response.status_code}")
35
+
36
+ # Test prediction with sample log
37
+ sample_log = """
38
+ 2023-10-27T12:00:00.000Z [INFO] Starting CI pipeline
39
+ 2023-10-27T12:00:01.000Z [INFO] Checkout successful
40
+ 2023-10-27T12:00:02.000Z [ERROR] pytest failed
41
+ 2023-10-27T12:00:03.000Z [ERROR] Test test_divide failed: ZeroDivisionError
42
+ """
43
+
44
+ response = requests.post(
45
+ "http://localhost:8000/predict",
46
+ json={
47
+ "log_content": sample_log,
48
+ "include_explanation": True
49
+ }
50
+ )
51
+
52
+ if response.status_code == 200:
53
+ result = response.json()
54
+ print(f"Prediction test successful:")
55
+ print(json.dumps(result, indent=2))
56
+ else:
57
+ print(f"Prediction test failed: {response.status_code}")
58
+
59
+ except requests.RequestException as e:
60
+ print(f"API test failed: {e}")
61
+
62
+ finally:
63
+ # Stop the API server
64
+ api_process.terminate()
65
+ api_process.wait()
66
+
67
+ def main():
68
+ print("🚀 Starting CI/CD Anomaly Detection System Test")
69
+ print("=" * 50)
70
+
71
+ # Test 1: Check if model files exist
72
+ print("1. Checking for model files...")
73
+ try:
74
+ import joblib
75
+ model = joblib.load('anomaly_model.pkl')
76
+ vectorizer = joblib.load('vectorizer.pkl')
77
+ print("✅ Model files found and loaded")
78
+ except FileNotFoundError:
79
+ print("❌ Model files not found. Run training first:")
80
+ print(" python preprocess_logs.py")
81
+ return
82
+
83
+ # Test 2: Test the model directly
84
+ print("\n2. Testing model directly...")
85
+ from preprocess_logs import AnomalyDetectionModel
86
+
87
+ model = AnomalyDetectionModel()
88
+
89
+ # Test with normal log
90
+ normal_log = "INFO: All tests passed successfully"
91
+ result = model.predict(normal_log)
92
+ print(f"Normal log prediction: {result}")
93
+
94
+ # Test with anomalous log
95
+ anomalous_log = "ERROR: Test failed with ZeroDivisionError: division by zero"
96
+ result = model.predict(anomalous_log)
97
+ print(f"Anomalous log prediction: {result}")
98
+
99
+ # Test 3: Test API
100
+ print("\n3. Testing API service...")
101
+ test_api_locally()
102
+
103
+ print("\n🎉 System test completed!")
104
+ print("\nNext steps:")
105
+ print("1. Deploy your API to a cloud service")
106
+ print("2. Set MODEL_API_URL secret in your GitHub repo")
107
+ print("3. Push some code changes to trigger the workflows")
108
+
109
+ if __name__ == "__main__":
110
+ main()