Medical-NER / Git / [047299] /tokenizer

Models:

cathy-stones/

Medical-NER

Downloads: 1

[047299]: / tokenizer_config.json

History

Download this file

59 lines (58 with data), 1.3 kB

{
  "added_tokens_decoder": {
    "0": {
      "content": "[PAD]",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "1": {
      "content": "[CLS]",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "2": {
      "content": "[SEP]",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "3": {
      "content": "[UNK]",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "128000": {
      "content": "[MASK]",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "bos_token": "[CLS]",
  "clean_up_tokenization_spaces": true,
  "cls_token": "[CLS]",
  "do_lower_case": false,
  "eos_token": "[SEP]",
  "mask_token": "[MASK]",
  "model_max_length": 1000000000000000019884624838656,
  "pad_token": "[PAD]",
  "sep_token": "[SEP]",
  "sp_model_kwargs": {},
  "split_by_punct": false,
  "tokenizer_class": "DebertaV2Tokenizer",
  "unk_token": "[UNK]",
  "vocab_type": "spm"
}