|
a |
|
b/dsb2018_topcoders/albu/src/utils.py |
|
|
1 |
import threading |
|
|
2 |
import numpy as np |
|
|
3 |
import cv2 |
|
|
4 |
cv2.setNumThreads(0) |
|
|
5 |
cv2.ocl.setUseOpenCL(False) |
|
|
6 |
from sklearn.model_selection import KFold |
|
|
7 |
from config import Config |
|
|
8 |
import argparse |
|
|
9 |
import json |
|
|
10 |
import pandas as pd |
|
|
11 |
|
|
|
12 |
|
|
|
13 |
def get_folds(data, num): |
|
|
14 |
kf = KFold(n_splits=num, shuffle=True, random_state=42) |
|
|
15 |
kf.get_n_splits(data) |
|
|
16 |
return kf.split(data) |
|
|
17 |
|
|
|
18 |
|
|
|
19 |
def get_csv_folds(ds, fn, holdout=False): |
|
|
20 |
df = pd.read_csv(fn) |
|
|
21 |
# df = df[df['source'] != 'janowczyk'] |
|
|
22 |
df = df[['img_id', 'fold', 'cluster', 'source']] |
|
|
23 |
folds = [] |
|
|
24 |
polosa_id = '193ffaa5272d5c421ae02130a64d98ad120ec70e4ed97a72cdcd4801ce93b066' |
|
|
25 |
galaxy_ids = ['538b7673d507014d83af238876e03617396b70fe27f525f8205a4a96900fbb8e', 'a102535b0e88374bea4a1cfd9ee7cb3822ff54f4ab2a9845d428ec22f9ee2288', 'cb4df20a83b2f38b394c67f1d9d4aef29f9794d5345da3576318374ec3a11490', 'f29fd9c52e04403cd2c7d43b6fe2479292e53b2f61969d25256d2d2aca7c6a81'] |
|
|
26 |
all_folds_ids = galaxy_ids + [polosa_id] |
|
|
27 |
for it in set(df['fold']): |
|
|
28 |
toadd = (df['fold'] != it) | (df['img_id'].isin(all_folds_ids)) | (df['source'] == 'wikimedia') |
|
|
29 |
|
|
|
30 |
val = df[(df['fold'] == it)]['img_id'].values.tolist() |
|
|
31 |
train_groups = df[toadd]['cluster'].values |
|
|
32 |
train_ids = df[toadd]['img_id'].values |
|
|
33 |
|
|
|
34 |
train = [] |
|
|
35 |
for i in range(len(train_ids)): |
|
|
36 |
rep = 1 |
|
|
37 |
if train_groups[i] in ['b', 'd', 'e', 'm']: |
|
|
38 |
rep = 4 |
|
|
39 |
elif train_groups[i] in ['c', 'n']: |
|
|
40 |
rep = 3 |
|
|
41 |
if train_ids[i] == polosa_id: |
|
|
42 |
rep = 5 |
|
|
43 |
train.extend([train_ids[i]] * rep) |
|
|
44 |
folds.append((ds.get_indexes_by_names(train), ds.get_indexes_by_names(val))) |
|
|
45 |
return folds |
|
|
46 |
|
|
|
47 |
|
|
|
48 |
def get_config(): |
|
|
49 |
parser = argparse.ArgumentParser() |
|
|
50 |
parser.add_argument('config_path') |
|
|
51 |
parser.add_argument('--fold', dest='fold', default=None, |
|
|
52 |
help='fold to process') |
|
|
53 |
args = parser.parse_args() |
|
|
54 |
with open(args.config_path, 'r') as f: |
|
|
55 |
config = json.load(f) |
|
|
56 |
print(config) |
|
|
57 |
config['fold'] = args.fold |
|
|
58 |
return Config(**config) |
|
|
59 |
|
|
|
60 |
def update_config(config, **kwargs): |
|
|
61 |
d = config._asdict() |
|
|
62 |
d.update(**kwargs) |
|
|
63 |
print(d) |
|
|
64 |
return Config(**d) |