|
a |
|
b/tests/tuning/config.yml |
|
|
1 |
# My usefull comment |
|
|
2 |
# 🤖 PIPELINE DEFINITION |
|
|
3 |
nlp: |
|
|
4 |
"@core": pipeline |
|
|
5 |
lang: eds |
|
|
6 |
|
|
|
7 |
components: |
|
|
8 |
normalizer: |
|
|
9 |
'@factory': eds.normalizer |
|
|
10 |
|
|
|
11 |
sentencizer: |
|
|
12 |
'@factory': eds.sentences |
|
|
13 |
|
|
|
14 |
ner: |
|
|
15 |
'@factory': eds.ner_crf |
|
|
16 |
mode: "joint" |
|
|
17 |
target_span_getter: "gold_spans" |
|
|
18 |
# Set spans as both to ents and in separate `ent.label` groups |
|
|
19 |
span_setter: [ "ents", "*" ] |
|
|
20 |
infer_span_setter: true |
|
|
21 |
|
|
|
22 |
embedding: |
|
|
23 |
'@factory': eds.text_cnn |
|
|
24 |
kernel_sizes: [ 3 ] |
|
|
25 |
|
|
|
26 |
embedding: |
|
|
27 |
'@factory': eds.transformer |
|
|
28 |
model: hf-internal-testing/tiny-bert |
|
|
29 |
window: 128 |
|
|
30 |
stride: 96 |
|
|
31 |
new_tokens: [ [ "(?:\\n\\s*)*\\n", "⏎" ] ] |
|
|
32 |
|
|
|
33 |
qualifier: |
|
|
34 |
'@factory': eds.span_classifier |
|
|
35 |
attributes: { "_.negation": [ "sosy" ], "_.unit": [ "measure" ] } |
|
|
36 |
span_getter: ["ents", "gold_spans"] |
|
|
37 |
|
|
|
38 |
embedding: |
|
|
39 |
'@factory': eds.span_pooler |
|
|
40 |
|
|
|
41 |
embedding: # ${ nlp.components.ner.embedding } |
|
|
42 |
'@factory': eds.text_cnn |
|
|
43 |
kernel_sizes: [ 3 ] |
|
|
44 |
|
|
|
45 |
embedding: |
|
|
46 |
'@factory': eds.transformer |
|
|
47 |
model: hf-internal-testing/tiny-bert |
|
|
48 |
window: 128 |
|
|
49 |
stride: 96 |
|
|
50 |
|
|
|
51 |
# 📈 SCORERS |
|
|
52 |
scorer: |
|
|
53 |
speed: true |
|
|
54 |
qual: |
|
|
55 |
'@metrics': eds.span_attributes |
|
|
56 |
span_getter: ${nlp.components.qualifier.span_getter} |
|
|
57 |
qualifiers: ${nlp.components.qualifier.attributes} |
|
|
58 |
ner: |
|
|
59 |
'@metrics': eds.ner_exact |
|
|
60 |
span_getter: ${nlp.components.ner.target_span_getter} |
|
|
61 |
|
|
|
62 |
# 🎛️ OPTIMIZER |
|
|
63 |
optimizer: |
|
|
64 |
"@core": optimizer |
|
|
65 |
optim: AdamW |
|
|
66 |
module: ${ nlp } |
|
|
67 |
groups: |
|
|
68 |
"^transformer": false |
|
|
69 |
".*": |
|
|
70 |
lr: |
|
|
71 |
"@schedules": linear |
|
|
72 |
start_value: 1e-3 |
|
|
73 |
max_value: 2e-3 |
|
|
74 |
warmup_rate: 0.5 |
|
|
75 |
total_steps: ${ train.max_steps } |
|
|
76 |
|
|
|
77 |
# 📚 DATA |
|
|
78 |
train_data: |
|
|
79 |
- data: |
|
|
80 |
'@readers': standoff |
|
|
81 |
path: tests/training/dataset/ |
|
|
82 |
converter: |
|
|
83 |
- '@factory': eds.standoff_dict2doc |
|
|
84 |
span_setter : 'gold_spans' |
|
|
85 |
span_attributes : ['sosy', 'unit', 'negation'] |
|
|
86 |
bool_attributes : ['negation'] # default standoff to doc converter |
|
|
87 |
- '@factory': eds.sentences |
|
|
88 |
nlp: ${nlp} |
|
|
89 |
- '@factory': eds.split |
|
|
90 |
nlp: null |
|
|
91 |
max_length: 2000 |
|
|
92 |
regex: '\n\n+' |
|
|
93 |
shuffle: dataset |
|
|
94 |
batch_size: 8 docs |
|
|
95 |
pipe_names: [ "ner" ] |
|
|
96 |
- data: |
|
|
97 |
'@readers': standoff |
|
|
98 |
path: tests/training/dataset/ |
|
|
99 |
converter: |
|
|
100 |
- '@factory': eds.standoff_dict2doc |
|
|
101 |
span_setter : 'gold_spans' |
|
|
102 |
span_attributes : ['sosy', 'unit', 'negation'] |
|
|
103 |
bool_attributes : ['negation'] # default standoff to doc converter |
|
|
104 |
shuffle: dataset |
|
|
105 |
batch_size: 16 spans |
|
|
106 |
pipe_names: [ "qualifier" ] |
|
|
107 |
|
|
|
108 |
val_data: |
|
|
109 |
'@readers': standoff |
|
|
110 |
path: tests/training/dataset/ |
|
|
111 |
converter: |
|
|
112 |
- '@factory': eds.standoff_dict2doc |
|
|
113 |
span_setter : 'gold_spans' |
|
|
114 |
span_attributes : ['sosy', 'unit', 'negation'] |
|
|
115 |
bool_attributes : ['negation'] # default standoff to doc converter |
|
|
116 |
|
|
|
117 |
# 🚀 TRAIN SCRIPT OPTIONS |
|
|
118 |
train: |
|
|
119 |
nlp: ${ nlp } |
|
|
120 |
train_data: ${ train_data } |
|
|
121 |
val_data: ${ val_data } |
|
|
122 |
max_steps: 5 |
|
|
123 |
validation_interval: 2 |
|
|
124 |
max_grad_norm: 1.0 |
|
|
125 |
scorer: ${ scorer } |
|
|
126 |
num_workers: 0 |
|
|
127 |
optimizer: ${ optimizer } |
|
|
128 |
cpu: true |