--- a +++ b/resources/lang.conf @@ -0,0 +1,113 @@ +#@meta {desc: 'linguistic parsing configuration', date: '2024-02-10'} + + +## Spacy token normalizers +# +# base NLP +[map_filter_token_normalizer] +class_name = zensols.nlp.MapTokenNormalizer +# add the split token mapper to have consistent token symmetry between the +# vanilla langres and mednlp_langres parsers as the MedCAT parser splits +# entities; see section `mednlp_map_filter_token_normalizer` +mapper_class_list = list: filter_token_mapper, split_ent_token_mapper + +# medical nlp +[mednlp_map_filter_token_normalizer] +class_name = zensols.nlp.MapTokenNormalizer +mapper_class_list = list: filter_token_mapper +embed_entities = False + + +## MedCat resources +# +[medcat_resource] +class_name = zensols.mednlp.MedCatResource +installer = instance: medcat_installer +vocab_resource = instance: medcat_vocab_resource +cdb_resource = instance: medcat_cdb_resource +mc_status_resource = instance: medcat_status_resource +umls_tuis = instance: medcat_umls_tuis +umls_groups = instance: medcal_uml_groups +requirements_dir = resource(zensols.mednlp): resources/requirements +cat_config = dict: + {'general': + {'spacy_model': '${mednlp_biomed_doc_parser:model_name}'}} + +[mednlp_library] +class_name = zensols.mednlp.MedicalLibrary +medcat_resource = instance: medcat_resource +# entity_linker_resource is optionally added in entlink.conf + + +## Base parser +# +# nlp parser override +[doc_parser] +# midsize model for standard NER +model_name = ${lang}_core_web_md +# install any missing spaCy models at runtime +auto_install_model = True + + +## Biomedical parsers +# +# ScispaCy parser +[mednlp_biomed_doc_parser] +class_name = zensols.nlp.sparser.SpacyFeatureDocumentParser +lang = ${doc_parser:lang} +# not very useful without using an entity recognizer model +# model_name = en_ner_bionlp13cg_md +# model_name = en_ner_jnlpba_md +model_name = ${lang}_core_sci_md +token_normalizer = instance: map_filter_token_normalizer +token_feature_ids = eval({'import': ['zensols.nlp as n']}): + n.FeatureToken.FEATURE_IDS +# install any missing spaCy models at runtime +auto_install_model = True + +# MedCAT features (CUIs, TUIs etc.) +[mednlp_medcat_doc_parser] +class_name = zensols.mednlp.MedCatFeatureDocumentParser +lang = ${doc_parser:lang} +model_name = ${doc_parser:model_name} +token_normalizer = instance: mednlp_map_filter_token_normalizer +medcat_resource = instance: medcat_resource +# set all features (override in your own configuration if you want them all) +token_feature_ids = eval({'import': ['zensols.nlp as n', 'zensols.mednlp as m']}): + (n.FeatureToken.FEATURE_IDS | m.MedicalFeatureToken.FEATURE_IDS) + + +## Combined parsers +# +# adds biomedical ScispaCy features (ent_) to the delegate (doc_parser) +[mednlp_combine_biomed_doc_parser] +class_name = zensols.nlp.combine.MappingCombinerFeatureDocumentParser +delegate = instance: doc_parser +source_parsers = instance: list: mednlp_biomed_doc_parser +# add the FeatureToken attribute regardless +#overwrite_nones = False +# only entities are missing from the medical parser output +yield_features = set: ent_, ent, ent_iob, ent_iob_ +# only map token level instead of sentence +merge_sentences = False +token_feature_ids = ${mednlp_biomed_doc_parser:token_feature_ids} + +# adds MedCAT features (CUIs, TUIs etc.) to the delegate (doc_parser) +[mednlp_combine_medcat_doc_parser] +class_name = zensols.nlp.combine.MappingCombinerFeatureDocumentParser +delegate = instance: doc_parser +source_parsers = instance: list: mednlp_medcat_doc_parser +# add the FeatureToken attribute regardless +overwrite_nones = True +# only entities are missing from the medical parser output +overwrite_features = eval({'import': ['zensols.mednlp as m']}): + m.MedicalFeatureToken.FEATURE_IDS +# only map token level instead of sentence +merge_sentences = False +token_feature_ids = ${mednlp_medcat_doc_parser:token_feature_ids} + +# adds both biomedical ScispaCy and MedCAT features +[mednlp_combine_biomed_medcat_doc_parser] +class_name = zensols.nlp.combine.MappingCombinerFeatureDocumentParser +delegate = instance: doc_parser +source_parsers = instance: list: mednlp_combine_biomed_doc_parser, mednlp_combine_medcat_doc_parser