a b/dsb2018_topcoders/albu/src/utils.py
1
import threading
2
import numpy as np
3
import cv2
4
cv2.setNumThreads(0)
5
cv2.ocl.setUseOpenCL(False)
6
from sklearn.model_selection import KFold
7
from config import Config
8
import argparse
9
import json
10
import pandas as pd
11
12
13
def get_folds(data, num):
14
    kf = KFold(n_splits=num, shuffle=True, random_state=42)
15
    kf.get_n_splits(data)
16
    return kf.split(data)
17
18
19
def get_csv_folds(ds, fn, holdout=False):
20
    df = pd.read_csv(fn)
21
    # df = df[df['source'] != 'janowczyk']
22
    df = df[['img_id', 'fold', 'cluster', 'source']]
23
    folds = []
24
    polosa_id = '193ffaa5272d5c421ae02130a64d98ad120ec70e4ed97a72cdcd4801ce93b066'
25
    galaxy_ids = ['538b7673d507014d83af238876e03617396b70fe27f525f8205a4a96900fbb8e', 'a102535b0e88374bea4a1cfd9ee7cb3822ff54f4ab2a9845d428ec22f9ee2288', 'cb4df20a83b2f38b394c67f1d9d4aef29f9794d5345da3576318374ec3a11490', 'f29fd9c52e04403cd2c7d43b6fe2479292e53b2f61969d25256d2d2aca7c6a81']
26
    all_folds_ids = galaxy_ids + [polosa_id]
27
    for it in set(df['fold']):
28
        toadd = (df['fold'] != it) | (df['img_id'].isin(all_folds_ids)) | (df['source'] == 'wikimedia')
29
30
        val = df[(df['fold'] == it)]['img_id'].values.tolist()
31
        train_groups = df[toadd]['cluster'].values
32
        train_ids = df[toadd]['img_id'].values
33
34
        train = []
35
        for i in range(len(train_ids)):
36
            rep = 1
37
            if train_groups[i] in ['b', 'd', 'e', 'm']:
38
                rep = 4
39
            elif train_groups[i] in ['c', 'n']:
40
                rep = 3
41
            if train_ids[i] == polosa_id:
42
                rep = 5
43
            train.extend([train_ids[i]] * rep)
44
        folds.append((ds.get_indexes_by_names(train), ds.get_indexes_by_names(val)))
45
    return folds
46
47
48
def get_config():
49
    parser = argparse.ArgumentParser()
50
    parser.add_argument('config_path')
51
    parser.add_argument('--fold', dest='fold', default=None,
52
                        help='fold to process')
53
    args = parser.parse_args()
54
    with open(args.config_path, 'r') as f:
55
        config = json.load(f)
56
        print(config)
57
    config['fold'] = args.fold
58
    return Config(**config)
59
60
def update_config(config, **kwargs):
61
    d = config._asdict()
62
    d.update(**kwargs)
63
    print(d)
64
    return Config(**d)