Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -33,7 +33,7 @@ def get_amp_model():
|
|
| 33 |
global _amp_model, _amp_scaler
|
| 34 |
if _amp_model is None:
|
| 35 |
from tensorflow.keras.models import load_model
|
| 36 |
-
_amp_model = load_model("Comb1_aac_ctd_RFE_selected_features_model.keras")
|
| 37 |
_amp_scaler = joblib.load("norm (4).joblib")
|
| 38 |
return _amp_model, _amp_scaler
|
| 39 |
|
|
@@ -149,7 +149,7 @@ def keras_predict_proba(X):
|
|
| 149 |
|
| 150 |
|
| 151 |
def extract_features(sequence):
|
| 152 |
-
"""Compute CTD + AAC, select the 343 training columns IN ORDER
|
| 153 |
sequence = ''.join([aa for aa in sequence.upper() if aa in "ACDEFGHIKLMNPQRSTVWY"])
|
| 154 |
if len(sequence) < 10:
|
| 155 |
return "Error: Sequence too short."
|
|
@@ -161,26 +161,34 @@ def extract_features(sequence):
|
|
| 161 |
ctd_features = CTD.CalculateCTD(sequence)
|
| 162 |
aac = AAComposition.CalculateAADipeptideComposition(sequence)
|
| 163 |
|
| 164 |
-
# Merge everything into one lookup dict
|
| 165 |
pool = {}
|
| 166 |
pool.update(ctd_features)
|
| 167 |
pool.update(aac)
|
| 168 |
|
| 169 |
-
# Verify all needed features are present
|
| 170 |
missing = [f for f in selected_features if f not in pool]
|
| 171 |
if missing:
|
| 172 |
return f"Error: Missing features from propy: {missing[:5]}..."
|
| 173 |
|
| 174 |
-
# Build
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
-
scaled = amp_scaler.transform(feature_row) # scaler expects exactly 343 cols
|
| 179 |
-
return scaled.astype(np.float32)
|
| 180 |
except Exception as e:
|
| 181 |
return f"Error in feature extraction: {str(e)}"
|
| 182 |
|
| 183 |
-
|
| 184 |
def predictmic(sequence):
|
| 185 |
"""Run MIC prediction in a SEPARATE process (mic_worker.py).
|
| 186 |
|
|
|
|
| 33 |
global _amp_model, _amp_scaler
|
| 34 |
if _amp_model is None:
|
| 35 |
from tensorflow.keras.models import load_model
|
| 36 |
+
_amp_model = load_model("Comb1_aac_ctd_RFE_selected_features_model(1).keras")
|
| 37 |
_amp_scaler = joblib.load("norm (4).joblib")
|
| 38 |
return _amp_model, _amp_scaler
|
| 39 |
|
|
|
|
| 149 |
|
| 150 |
|
| 151 |
def extract_features(sequence):
|
| 152 |
+
"""Compute CTD + AAC, scale the FULL feature pool, then select the 343 training columns IN ORDER."""
|
| 153 |
sequence = ''.join([aa for aa in sequence.upper() if aa in "ACDEFGHIKLMNPQRSTVWY"])
|
| 154 |
if len(sequence) < 10:
|
| 155 |
return "Error: Sequence too short."
|
|
|
|
| 161 |
ctd_features = CTD.CalculateCTD(sequence)
|
| 162 |
aac = AAComposition.CalculateAADipeptideComposition(sequence)
|
| 163 |
|
|
|
|
| 164 |
pool = {}
|
| 165 |
pool.update(ctd_features)
|
| 166 |
pool.update(aac)
|
| 167 |
|
| 168 |
+
# Verify all needed features are present BEFORE doing anything
|
| 169 |
missing = [f for f in selected_features if f not in pool]
|
| 170 |
if missing:
|
| 171 |
return f"Error: Missing features from propy: {missing[:5]}..."
|
| 172 |
|
| 173 |
+
# 1. Build a row from ALL pool keys, in a consistent order
|
| 174 |
+
all_feature_names = sorted(pool.keys())
|
| 175 |
+
all_values = np.array([pool[f] for f in all_feature_names], dtype=np.float64).reshape(1, -1)
|
| 176 |
+
|
| 177 |
+
# 2. Scale the FULL feature row
|
| 178 |
+
# NOTE: amp_scaler must have been fit on this same full pool.
|
| 179 |
+
# If your scaler was fit on only 343 cols, re-fit it on the full pool first.
|
| 180 |
+
scaled_all = amp_scaler.transform(all_values)
|
| 181 |
+
|
| 182 |
+
# 3. Select the 343 features IN TRAINING ORDER from the scaled array
|
| 183 |
+
name_to_idx = {name: i for i, name in enumerate(all_feature_names)}
|
| 184 |
+
selected_indices = [name_to_idx[f] for f in selected_features]
|
| 185 |
+
scaled_selected = scaled_all[:, selected_indices]
|
| 186 |
+
|
| 187 |
+
return scaled_selected.astype(np.float32)
|
| 188 |
|
|
|
|
|
|
|
| 189 |
except Exception as e:
|
| 190 |
return f"Error in feature extraction: {str(e)}"
|
| 191 |
|
|
|
|
| 192 |
def predictmic(sequence):
|
| 193 |
"""Run MIC prediction in a SEPARATE process (mic_worker.py).
|
| 194 |
|