edsnlp / Git / Diff of /tests/tuning/config.yml

Models:
philipB/
edsnlp
Downloads: 1
Diff of /tests/tuning/config.yml [000000] .. [cad161]
Switch to side-by-side view

--- a
+++ b/tests/tuning/config.yml
@@ -0,0 +1,128 @@
+# My usefull comment
+# 🤖 PIPELINE DEFINITION
+nlp:
+  "@core": pipeline
+  lang: eds
+
+  components:
+    normalizer:
+      '@factory': eds.normalizer
+
+    sentencizer:
+      '@factory': eds.sentences
+
+    ner:
+      '@factory': eds.ner_crf
+      mode: "joint"
+      target_span_getter: "gold_spans"
+      # Set spans as both to ents and in separate `ent.label` groups
+      span_setter: [ "ents", "*" ]
+      infer_span_setter: true
+
+      embedding:
+        '@factory': eds.text_cnn
+        kernel_sizes: [ 3 ]
+
+        embedding:
+          '@factory': eds.transformer
+          model: hf-internal-testing/tiny-bert
+          window: 128
+          stride: 96
+          new_tokens: [ [ "(?:\\n\\s*)*\\n", "⏎" ] ]
+
+    qualifier:
+      '@factory': eds.span_classifier
+      attributes: { "_.negation": [ "sosy" ], "_.unit": [ "measure" ] }
+      span_getter: ["ents", "gold_spans"]
+
+      embedding:
+        '@factory': eds.span_pooler
+
+        embedding: # ${ nlp.components.ner.embedding }
+          '@factory': eds.text_cnn
+          kernel_sizes: [ 3 ]
+
+          embedding:
+            '@factory': eds.transformer
+            model: hf-internal-testing/tiny-bert
+            window: 128
+            stride: 96
+
+# 📈 SCORERS
+scorer:
+  speed: true
+  qual:
+    '@metrics': eds.span_attributes
+    span_getter: ${nlp.components.qualifier.span_getter}
+    qualifiers: ${nlp.components.qualifier.attributes}
+  ner:
+    '@metrics': eds.ner_exact
+    span_getter: ${nlp.components.ner.target_span_getter}
+
+# 🎛️ OPTIMIZER
+optimizer:
+  "@core": optimizer
+  optim: AdamW
+  module: ${ nlp }
+  groups:
+    "^transformer": false
+    ".*":
+      lr:
+          "@schedules": linear
+          start_value: 1e-3
+          max_value: 2e-3
+          warmup_rate: 0.5
+  total_steps: ${ train.max_steps }
+
+# 📚 DATA
+train_data:
+  - data:
+      '@readers': standoff
+      path: tests/training/dataset/
+      converter:
+        - '@factory': eds.standoff_dict2doc
+          span_setter : 'gold_spans'
+          span_attributes : ['sosy', 'unit', 'negation']
+          bool_attributes : ['negation']  # default standoff to doc converter
+        - '@factory': eds.sentences
+          nlp: ${nlp}
+        - '@factory': eds.split
+          nlp: null
+          max_length: 2000
+          regex: '\n\n+'
+    shuffle: dataset
+    batch_size: 8 docs
+    pipe_names: [ "ner" ]
+  - data:
+      '@readers': standoff
+      path: tests/training/dataset/
+      converter:
+        - '@factory': eds.standoff_dict2doc
+          span_setter : 'gold_spans'
+          span_attributes : ['sosy', 'unit', 'negation']
+          bool_attributes : ['negation']  # default standoff to doc converter
+    shuffle: dataset
+    batch_size: 16 spans
+    pipe_names: [ "qualifier" ]
+
+val_data:
+  '@readers': standoff
+  path: tests/training/dataset/
+  converter:
+    - '@factory': eds.standoff_dict2doc
+      span_setter : 'gold_spans'
+      span_attributes : ['sosy', 'unit', 'negation']
+      bool_attributes : ['negation']  # default standoff to doc converter
+
+# 🚀 TRAIN SCRIPT OPTIONS
+train:
+  nlp: ${ nlp }
+  train_data: ${ train_data }
+  val_data: ${ val_data }
+  max_steps: 5
+  validation_interval: 2
+  max_grad_norm: 1.0
+  scorer: ${ scorer }
+  num_workers: 0
+  optimizer: ${ optimizer }
+  cpu: true