AMP-Classifier2

Sleeping

App Files Files Community

nonzeroexit commited on May 21

Commit

03f381c

verified ·

1 Parent(s): caf966d

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -10

app.py CHANGED Viewed

@@ -33,7 +33,7 @@ def get_amp_model():
     global _amp_model, _amp_scaler
     if _amp_model is None:
         from tensorflow.keras.models import load_model
-        _amp_model = load_model("Comb1_aac_ctd_RFE_selected_features_model.keras")
         _amp_scaler = joblib.load("norm (4).joblib")
     return _amp_model, _amp_scaler
@@ -149,7 +149,7 @@ def keras_predict_proba(X):
 def extract_features(sequence):
-    """Compute CTD + AAC, select the 343 training columns IN ORDER, then scale."""
     sequence = ''.join([aa for aa in sequence.upper() if aa in "ACDEFGHIKLMNPQRSTVWY"])
     if len(sequence) < 10:
         return "Error: Sequence too short."
@@ -161,26 +161,34 @@ def extract_features(sequence):
         ctd_features = CTD.CalculateCTD(sequence)
         aac = AAComposition.CalculateAADipeptideComposition(sequence)
-        # Merge everything into one lookup dict
         pool = {}
         pool.update(ctd_features)
         pool.update(aac)
-        # Verify all needed features are present
         missing = [f for f in selected_features if f not in pool]
         if missing:
             return f"Error: Missing features from propy: {missing[:5]}..."
-        # Build the 343-wide row IN THE EXACT TRAINING ORDER, THEN scale.
-        ordered_values = [pool[f] for f in selected_features]
-        feature_row = np.array(ordered_values, dtype=np.float64).reshape(1, -1)
-        scaled = amp_scaler.transform(feature_row)  # scaler expects exactly 343 cols
-        return scaled.astype(np.float32)
     except Exception as e:
         return f"Error in feature extraction: {str(e)}"
 def predictmic(sequence):
     """Run MIC prediction in a SEPARATE process (mic_worker.py).

     global _amp_model, _amp_scaler
     if _amp_model is None:
         from tensorflow.keras.models import load_model
+        _amp_model = load_model("Comb1_aac_ctd_RFE_selected_features_model(1).keras")
         _amp_scaler = joblib.load("norm (4).joblib")
     return _amp_model, _amp_scaler
 def extract_features(sequence):
+    """Compute CTD + AAC, scale the FULL feature pool, then select the 343 training columns IN ORDER."""
     sequence = ''.join([aa for aa in sequence.upper() if aa in "ACDEFGHIKLMNPQRSTVWY"])
     if len(sequence) < 10:
         return "Error: Sequence too short."
         ctd_features = CTD.CalculateCTD(sequence)
         aac = AAComposition.CalculateAADipeptideComposition(sequence)
         pool = {}
         pool.update(ctd_features)
         pool.update(aac)
+        # Verify all needed features are present BEFORE doing anything
         missing = [f for f in selected_features if f not in pool]
         if missing:
             return f"Error: Missing features from propy: {missing[:5]}..."
+        # 1. Build a row from ALL pool keys, in a consistent order
+        all_feature_names = sorted(pool.keys())
+        all_values = np.array([pool[f] for f in all_feature_names], dtype=np.float64).reshape(1, -1)
+        # 2. Scale the FULL feature row
+        #    NOTE: amp_scaler must have been fit on this same full pool.
+        #    If your scaler was fit on only 343 cols, re-fit it on the full pool first.
+        scaled_all = amp_scaler.transform(all_values)
+        # 3. Select the 343 features IN TRAINING ORDER from the scaled array
+        name_to_idx = {name: i for i, name in enumerate(all_feature_names)}
+        selected_indices = [name_to_idx[f] for f in selected_features]
+        scaled_selected = scaled_all[:, selected_indices]
+        return scaled_selected.astype(np.float32)
     except Exception as e:
         return f"Error in feature extraction: {str(e)}"
 def predictmic(sequence):
     """Run MIC prediction in a SEPARATE process (mic_worker.py).