|
a |
|
b/medicalbert/datareader/FeatureSetBuilder.py |
|
|
1 |
# This class allows us to create a set of input features |
|
|
2 |
# where each new input feature is added as another dimension |
|
|
3 |
# For example |
|
|
4 |
# We could add a value for the first element related to age |
|
|
5 |
# and in the second element its health condition |
|
|
6 |
# But for now, this allows us to break long pieces of text |
|
|
7 |
# into shorter chunks |
|
|
8 |
|
|
|
9 |
class FeatureSetBuilder: |
|
|
10 |
def __init__(self, label): |
|
|
11 |
self.features = [] |
|
|
12 |
self.label = label |
|
|
13 |
|
|
|
14 |
|
|
|
15 |
def add(self, input_feature): |
|
|
16 |
self.features.append(input_feature) |
|
|
17 |
|
|
|
18 |
def resize(self, num_sections, func): |
|
|
19 |
# if the num sections isn't maxed we either need to pad out or cut down. |
|
|
20 |
|
|
|
21 |
while len(self.features) < num_sections: |
|
|
22 |
self.features.append(func) |
|
|
23 |
|
|
|
24 |
# Handle the case where we have too many sections - cut at the head |
|
|
25 |
if len(self.features) > num_sections: |
|
|
26 |
self.features = self.features[-num_sections:] |
|
|
27 |
|
|
|
28 |
#returns all features |
|
|
29 |
def get(self): |
|
|
30 |
return self.features |
|
|
31 |
|
|
|
32 |
def get_feature(self, feature_index): |
|
|
33 |
return self.features[feature_index] |
|
|
34 |
|
|
|
35 |
def get_label(self): |
|
|
36 |
return self.label |