Diff of /resources/lang.conf [000000] .. [ca4dac]

Switch to unified view

a b/resources/lang.conf
1
#@meta {desc: 'linguistic parsing configuration', date: '2024-02-10'}
2
3
4
## Spacy token normalizers
5
#
6
# base NLP
7
[map_filter_token_normalizer]
8
class_name = zensols.nlp.MapTokenNormalizer
9
# add the split token mapper to have consistent token symmetry between the
10
# vanilla langres and mednlp_langres parsers as the MedCAT parser splits
11
# entities; see section `mednlp_map_filter_token_normalizer`
12
mapper_class_list = list: filter_token_mapper, split_ent_token_mapper
13
14
# medical nlp
15
[mednlp_map_filter_token_normalizer]
16
class_name = zensols.nlp.MapTokenNormalizer
17
mapper_class_list = list: filter_token_mapper
18
embed_entities = False
19
20
21
## MedCat resources
22
#
23
[medcat_resource]
24
class_name = zensols.mednlp.MedCatResource
25
installer = instance: medcat_installer
26
vocab_resource = instance: medcat_vocab_resource
27
cdb_resource = instance: medcat_cdb_resource
28
mc_status_resource = instance: medcat_status_resource
29
umls_tuis = instance: medcat_umls_tuis
30
umls_groups = instance: medcal_uml_groups
31
requirements_dir = resource(zensols.mednlp): resources/requirements
32
cat_config = dict:
33
  {'general':
34
    {'spacy_model': '${mednlp_biomed_doc_parser:model_name}'}}
35
36
[mednlp_library]
37
class_name = zensols.mednlp.MedicalLibrary
38
medcat_resource = instance: medcat_resource
39
# entity_linker_resource is optionally added in entlink.conf
40
41
42
## Base parser
43
#
44
# nlp parser override
45
[doc_parser]
46
# midsize model for standard NER
47
model_name = ${lang}_core_web_md
48
# install any missing spaCy models at runtime
49
auto_install_model = True
50
51
52
## Biomedical parsers
53
#
54
# ScispaCy parser
55
[mednlp_biomed_doc_parser]
56
class_name = zensols.nlp.sparser.SpacyFeatureDocumentParser
57
lang = ${doc_parser:lang}
58
# not very useful without using an entity recognizer model
59
# model_name = en_ner_bionlp13cg_md
60
# model_name = en_ner_jnlpba_md
61
model_name = ${lang}_core_sci_md
62
token_normalizer = instance: map_filter_token_normalizer
63
token_feature_ids = eval({'import': ['zensols.nlp as n']}):
64
  n.FeatureToken.FEATURE_IDS
65
# install any missing spaCy models at runtime
66
auto_install_model = True
67
68
# MedCAT features (CUIs, TUIs etc.)
69
[mednlp_medcat_doc_parser]
70
class_name = zensols.mednlp.MedCatFeatureDocumentParser
71
lang = ${doc_parser:lang}
72
model_name = ${doc_parser:model_name}
73
token_normalizer = instance: mednlp_map_filter_token_normalizer
74
medcat_resource = instance: medcat_resource
75
# set all features (override in your own configuration if you want them all)
76
token_feature_ids = eval({'import': ['zensols.nlp as n', 'zensols.mednlp as m']}):
77
  (n.FeatureToken.FEATURE_IDS | m.MedicalFeatureToken.FEATURE_IDS)
78
79
80
## Combined parsers
81
#
82
# adds biomedical ScispaCy features (ent_) to the delegate (doc_parser)
83
[mednlp_combine_biomed_doc_parser]
84
class_name = zensols.nlp.combine.MappingCombinerFeatureDocumentParser
85
delegate = instance: doc_parser
86
source_parsers = instance: list: mednlp_biomed_doc_parser
87
# add the FeatureToken attribute regardless
88
#overwrite_nones = False
89
# only entities are missing from the medical parser output
90
yield_features = set: ent_, ent, ent_iob, ent_iob_
91
# only map token level instead of sentence
92
merge_sentences = False
93
token_feature_ids = ${mednlp_biomed_doc_parser:token_feature_ids}
94
95
# adds MedCAT features (CUIs, TUIs etc.) to the delegate (doc_parser)
96
[mednlp_combine_medcat_doc_parser]
97
class_name = zensols.nlp.combine.MappingCombinerFeatureDocumentParser
98
delegate = instance: doc_parser
99
source_parsers = instance: list: mednlp_medcat_doc_parser
100
# add the FeatureToken attribute regardless
101
overwrite_nones = True
102
# only entities are missing from the medical parser output
103
overwrite_features = eval({'import': ['zensols.mednlp as m']}):
104
  m.MedicalFeatureToken.FEATURE_IDS
105
# only map token level instead of sentence
106
merge_sentences = False
107
token_feature_ids = ${mednlp_medcat_doc_parser:token_feature_ids}
108
109
# adds both biomedical ScispaCy and MedCAT features
110
[mednlp_combine_biomed_medcat_doc_parser]
111
class_name = zensols.nlp.combine.MappingCombinerFeatureDocumentParser
112
delegate = instance: doc_parser
113
source_parsers = instance: list: mednlp_combine_biomed_doc_parser, mednlp_combine_medcat_doc_parser