|
a |
|
b/analysis/ml/ScaledLogisticRegression.py |
|
|
1 |
import multiprocessing |
|
|
2 |
|
|
|
3 |
if __name__ == '__main__': |
|
|
4 |
multiprocessing.set_start_method('forkserver') |
|
|
5 |
import sys |
|
|
6 |
from sklearn.linear_model import LogisticRegression |
|
|
7 |
from sklearn.preprocessing import StandardScaler |
|
|
8 |
from sklearn.pipeline import Pipeline |
|
|
9 |
import pdb |
|
|
10 |
from evaluate_model import evaluate_model |
|
|
11 |
import numpy as np |
|
|
12 |
|
|
|
13 |
dataset = sys.argv[1] |
|
|
14 |
save_file = sys.argv[2] |
|
|
15 |
random_seed = int(sys.argv[3]) |
|
|
16 |
rare = eval(sys.argv[4]) |
|
|
17 |
|
|
|
18 |
# create the classifier |
|
|
19 |
clf = Pipeline([('scale',StandardScaler()), |
|
|
20 |
('lr', LogisticRegression(solver='saga', |
|
|
21 |
max_iter=1000, |
|
|
22 |
random_state=random_seed)) |
|
|
23 |
]) |
|
|
24 |
|
|
|
25 |
hyper_params = { |
|
|
26 |
'lr__C': np.logspace(-2,1,20), |
|
|
27 |
'lr__penalty': ['l1','l2'] |
|
|
28 |
} |
|
|
29 |
# evaluate the model |
|
|
30 |
evaluate_model(dataset, save_file, random_seed, clf, 'ScaleLR', hyper_params, False,rare=rare) |