surbi karki commited on
Commit
928f8a8
·
verified ·
1 Parent(s): 6c5196d

Upload 2 files

Browse files
Files changed (2) hide show
  1. Outlier_detection.ipynb +0 -0
  2. featureselection.ipynb +190 -0
Outlier_detection.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
featureselection.ipynb ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import numpy as np\n",
11
+ "from sklearn.feature_selection import mutual_info_classif\n"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": null,
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "\n",
21
+ "data = pd.read_csv('pcos_cleaned.csv')\n",
22
+ "\n",
23
+ "\n"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": 3,
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "y = data[\"PCOS (Y/N)\"]\n",
33
+ "X = data.drop([\"PCOS (Y/N)\"], axis=1)"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 4,
39
+ "metadata": {},
40
+ "outputs": [
41
+ {
42
+ "name": "stdout",
43
+ "output_type": "stream",
44
+ "text": [
45
+ " Feature Mutual Information\n",
46
+ "38 Follicle No. (R) 0.240107\n",
47
+ "37 Follicle No. (L) 0.198132\n",
48
+ "33 Fast food (Y/N) 0.095965\n",
49
+ "29 hair growth(Y/N) 0.094711\n",
50
+ "30 Skin darkening (Y/N) 0.094472\n",
51
+ "28 Weight gain(Y/N) 0.091420\n",
52
+ "10 Cycle length(days) 0.074662\n",
53
+ "23 AMH(ng/mL) 0.066603\n",
54
+ "18 FSH/LH 0.065068\n",
55
+ "24 PRL(ng/mL) 0.061647\n",
56
+ "9 Cycle(R/I) 0.052702\n",
57
+ "13 No. of abortions 0.028979\n",
58
+ "7 RR (breaths/min) 0.028374\n",
59
+ "20 Waist(inch) 0.026092\n",
60
+ "12 Pregnant(Y/N) 0.024060\n",
61
+ "32 Pimples(Y/N) 0.023784\n",
62
+ "39 Avg. F size (L) (mm) 0.022989\n",
63
+ "22 TSH (mIU/L) 0.022234\n",
64
+ "31 Hair loss(Y/N) 0.019978\n",
65
+ "40 Avg. F size (R) (mm) 0.019886\n",
66
+ "16 FSH(mIU/mL) 0.019688\n",
67
+ "1 Age (yrs) 0.019323\n",
68
+ "0 Unnamed: 0 0.017659\n",
69
+ "17 LH(mIU/mL) 0.017577\n",
70
+ "4 BMI 0.015053\n",
71
+ "25 Vit D3 (ng/mL) 0.014276\n",
72
+ "6 Pulse rate(bpm) 0.013627\n",
73
+ "34 Reg.Exercise(Y/N) 0.009540\n",
74
+ "36 BP _Diastolic (mmHg) 0.008657\n",
75
+ "14 I beta-HCG(mIU/mL) 0.007298\n",
76
+ "35 BP _Systolic (mmHg) 0.006151\n",
77
+ "41 Endometrium (mm) 0.004497\n",
78
+ "3 Height(Cm) 0.000000\n",
79
+ "5 Blood Group 0.000000\n",
80
+ "8 Hb(g/dl) 0.000000\n",
81
+ "2 Weight (Kg) 0.000000\n",
82
+ "11 Marraige Status (Yrs) 0.000000\n",
83
+ "15 II beta-HCG(mIU/mL) 0.000000\n",
84
+ "19 Hip(inch) 0.000000\n",
85
+ "21 Waist:Hip Ratio 0.000000\n",
86
+ "27 RBS(mg/dl) 0.000000\n",
87
+ "26 PRG(ng/mL) 0.000000\n"
88
+ ]
89
+ }
90
+ ],
91
+ "source": [
92
+ "# Calculate Mutual Information\n",
93
+ "mi = mutual_info_classif(X, y)\n",
94
+ "\n",
95
+ "# Create a DataFrame to show feature importance\n",
96
+ "mi_df = pd.DataFrame({'Feature': X.columns, 'Mutual Information': mi})\n",
97
+ "\n",
98
+ "# Sort features by mutual information value\n",
99
+ "mi_df = mi_df.sort_values(by='Mutual Information', ascending=False)\n",
100
+ "\n",
101
+ "print(mi_df)\n"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "code",
106
+ "execution_count": 5,
107
+ "metadata": {},
108
+ "outputs": [
109
+ {
110
+ "name": "stdout",
111
+ "output_type": "stream",
112
+ "text": [
113
+ " PCOS (Y/N) Follicle No. (R) Follicle No. (L) Skin darkening (Y/N) \\\n",
114
+ "0 0 3 3 0 \n",
115
+ "1 0 5 3 0 \n",
116
+ "2 1 15 13 0 \n",
117
+ "3 0 2 2 0 \n",
118
+ "4 0 4 3 0 \n",
119
+ "\n",
120
+ " hair growth(Y/N) Weight gain(Y/N) Cycle length(days) AMH(ng/mL) \\\n",
121
+ "0 0 0 5 2.07 \n",
122
+ "1 0 0 5 1.53 \n",
123
+ "2 0 0 5 6.63 \n",
124
+ "3 0 0 5 1.22 \n",
125
+ "4 0 0 5 2.26 \n",
126
+ "\n",
127
+ " Fast food (Y/N) Cycle(R/I) FSH/LH PRL(ng/mL) Pimples(Y/N) Age (yrs) \\\n",
128
+ "0 1.0 0 2.160326 45.16 0 28 \n",
129
+ "1 0.0 0 6.174312 20.09 0 36 \n",
130
+ "2 1.0 0 6.295455 10.52 1 33 \n",
131
+ "3 0.0 0 3.415254 36.90 0 37 \n",
132
+ "4 0.0 0 4.422222 30.09 0 25 \n",
133
+ "\n",
134
+ " BMI \n",
135
+ "0 19.3 \n",
136
+ "1 24.9 \n",
137
+ "2 25.3 \n",
138
+ "3 29.7 \n",
139
+ "4 20.1 \n"
140
+ ]
141
+ }
142
+ ],
143
+ "source": [
144
+ "pcos_df = pd.read_csv('pcos_cleaned.csv')\n",
145
+ "pcos_df.columns = pcos_df.columns.str.strip() # Remove any leading/trailing whitespace\n",
146
+ "selected_features = ['PCOS (Y/N)', 'Follicle No. (R)', 'Follicle No. (L)', \n",
147
+ " 'Skin darkening (Y/N)', 'hair growth(Y/N)', 'Weight gain(Y/N)', \n",
148
+ " 'Cycle length(days)', 'AMH(ng/mL)', 'Fast food (Y/N)', \n",
149
+ " 'Cycle(R/I)', 'FSH/LH', 'PRL(ng/mL)', 'Pimples(Y/N)', \n",
150
+ " 'Age (yrs)', 'BMI']\n",
151
+ "\n",
152
+ "new_dataset = pcos_df[selected_features]\n",
153
+ "\n",
154
+ "# Save the new dataset to a CSV file if needed\n",
155
+ "new_dataset.to_csv('new_pcos_dataset.csv', index=False)\n",
156
+ "\n",
157
+ "# Display the new dataset\n",
158
+ "print(new_dataset.head())"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": null,
164
+ "metadata": {},
165
+ "outputs": [],
166
+ "source": []
167
+ }
168
+ ],
169
+ "metadata": {
170
+ "kernelspec": {
171
+ "display_name": "Python 3",
172
+ "language": "python",
173
+ "name": "python3"
174
+ },
175
+ "language_info": {
176
+ "codemirror_mode": {
177
+ "name": "ipython",
178
+ "version": 3
179
+ },
180
+ "file_extension": ".py",
181
+ "mimetype": "text/x-python",
182
+ "name": "python",
183
+ "nbconvert_exporter": "python",
184
+ "pygments_lexer": "ipython3",
185
+ "version": "3.10.7"
186
+ }
187
+ },
188
+ "nbformat": 4,
189
+ "nbformat_minor": 2
190
+ }