[cad161]: / tests / tuning / config.yml

Download this file

129 lines (114 with data), 3.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# My usefull comment
# 🤖 PIPELINE DEFINITION
nlp:
"@core": pipeline
lang: eds
components:
normalizer:
'@factory': eds.normalizer
sentencizer:
'@factory': eds.sentences
ner:
'@factory': eds.ner_crf
mode: "joint"
target_span_getter: "gold_spans"
# Set spans as both to ents and in separate `ent.label` groups
span_setter: [ "ents", "*" ]
infer_span_setter: true
embedding:
'@factory': eds.text_cnn
kernel_sizes: [ 3 ]
embedding:
'@factory': eds.transformer
model: hf-internal-testing/tiny-bert
window: 128
stride: 96
new_tokens: [ [ "(?:\\n\\s*)*\\n", "⏎" ] ]
qualifier:
'@factory': eds.span_classifier
attributes: { "_.negation": [ "sosy" ], "_.unit": [ "measure" ] }
span_getter: ["ents", "gold_spans"]
embedding:
'@factory': eds.span_pooler
embedding: # ${ nlp.components.ner.embedding }
'@factory': eds.text_cnn
kernel_sizes: [ 3 ]
embedding:
'@factory': eds.transformer
model: hf-internal-testing/tiny-bert
window: 128
stride: 96
# 📈 SCORERS
scorer:
speed: true
qual:
'@metrics': eds.span_attributes
span_getter: ${nlp.components.qualifier.span_getter}
qualifiers: ${nlp.components.qualifier.attributes}
ner:
'@metrics': eds.ner_exact
span_getter: ${nlp.components.ner.target_span_getter}
# 🎛️ OPTIMIZER
optimizer:
"@core": optimizer
optim: AdamW
module: ${ nlp }
groups:
"^transformer": false
".*":
lr:
"@schedules": linear
start_value: 1e-3
max_value: 2e-3
warmup_rate: 0.5
total_steps: ${ train.max_steps }
# 📚 DATA
train_data:
- data:
'@readers': standoff
path: tests/training/dataset/
converter:
- '@factory': eds.standoff_dict2doc
span_setter : 'gold_spans'
span_attributes : ['sosy', 'unit', 'negation']
bool_attributes : ['negation'] # default standoff to doc converter
- '@factory': eds.sentences
nlp: ${nlp}
- '@factory': eds.split
nlp: null
max_length: 2000
regex: '\n\n+'
shuffle: dataset
batch_size: 8 docs
pipe_names: [ "ner" ]
- data:
'@readers': standoff
path: tests/training/dataset/
converter:
- '@factory': eds.standoff_dict2doc
span_setter : 'gold_spans'
span_attributes : ['sosy', 'unit', 'negation']
bool_attributes : ['negation'] # default standoff to doc converter
shuffle: dataset
batch_size: 16 spans
pipe_names: [ "qualifier" ]
val_data:
'@readers': standoff
path: tests/training/dataset/
converter:
- '@factory': eds.standoff_dict2doc
span_setter : 'gold_spans'
span_attributes : ['sosy', 'unit', 'negation']
bool_attributes : ['negation'] # default standoff to doc converter
# 🚀 TRAIN SCRIPT OPTIONS
train:
nlp: ${ nlp }
train_data: ${ train_data }
val_data: ${ val_data }
max_steps: 5
validation_interval: 2
max_grad_norm: 1.0
scorer: ${ scorer }
num_workers: 0
optimizer: ${ optimizer }
cpu: true