a b/development/rasa/synonym_generator.py
1
import ast
2
import pandas as pd
3
from tqdm import tqdm
4
from ruamel import yaml
5
6
7
class nlu_generator():
8
    def __init__(self, mode, med_dataset):
9
        super().__init__()
10
        df = pd.read_csv(med_dataset)
11
        if mode == 'drug':
12
            col = 'drug_name'
13
            aka_col = 'Brand names'
14
        else:
15
            col = 'Lab test'
16
            aka_col = 'Also Known As'
17
        self.list = df[col].str.lower()
18
        aka_list = df[aka_col].str.lower()
19
        self.aka_list = aka_list.str.replace('®', '')
20
        self.mode = mode
21
22
    # Synonym key generator
23
    def __syn_generator(self, item, aka_list):
24
        inp = f"""\
25
nlu:
26
- synonym: {item}
27
  examples: |
28
"""
29
        code = yaml.load(inp, Loader=yaml.RoundTripLoader)
30
        if self.mode == 'lab':
31
            aka_list = ast.literal_eval(aka_list)
32
        else:
33
            aka_list = [aka_list]
34
        for aka in aka_list:
35
            code['nlu'][0]['examples'] += f'- {aka}\n'
36
        return code
37
38
    # First keys generator
39
    def __block_generator(self):
40
        inp = f"""\
41
version: "2.0"
42
43
nlu:
44
"""
45
        code = yaml.load(inp, Loader=yaml.RoundTripLoader)
46
        code['nlu'] = self.__syn_generator(
47
            self.list[0], self.aka_list[0])['nlu']
48
        return code
49
50
    # NaN check
51
    def __isnan(self, num):
52
        isnan = (num != num)
53
        return isnan
54
55
    # YAML generator
56
    def generate(self):
57
        # Generate first keys
58
        code = self.__block_generator()
59
        for i, item in tqdm(enumerate(self.list[1:], start=1)):
60
            # Check if aka_list for that item exists or not
61
            if not self.__isnan(self.aka_list[i]):
62
                # Append aka_list to YAML
63
                code['nlu'].append(self.__syn_generator(
64
                    item, self.aka_list[i])['nlu'][0])
65
        return code
66
67
    # Write data in the disk
68
    def write_data(self, code, nlu_file):
69
        with open(nlu_file, 'w') as f:
70
            yaml.dump(code, f, Dumper=yaml.RoundTripDumper)
71
72
73
def main():
74
    mode = 'lab'
75
    #mode = 'drug'
76
    generator = nlu_generator(
77
        mode, '../labtestonline_datasets/dataset_files/labtest_dataset.csv')
78
    #generator = nlu_generator(
79
        #mode, '../medlineplus_drug_dataset/dataset_files/MedlinePlus_2.csv')
80
    code = generator.generate()
81
    generator.write_data(code, 'data/synonym_lab.yml')
82
    #generator.write_data(code, 'data/synonym_drug.yml')
83
84
85
if __name__ == "__main__":
86
    main()