Switch to side-by-side view

--- a
+++ b/development/paraphrase/randomize entities.py
@@ -0,0 +1,67 @@
+import re
+from ruamel import yaml
+import numpy as np
+import sys
+
+
+def load_entities(entity_names):
+    entities_dict = dict()
+    for entity in entity_names:
+        entity_file_adr = f'./{entity}.yml'
+        entities_dict[entity] = load_entities_list(entity_file_adr)
+    return entities_dict
+ 
+
+def load_entities_list(file_adr):
+    with open(file_adr, 'r') as f:
+        entities = yaml.load(f, Loader=yaml.RoundTripLoader)
+    entities = entities['nlu'][0]
+    entities = entities['examples'].split('\n')
+    entities = [row[2:] for row in entities]
+    return entities
+
+
+def randomize_entity_names(nlu_dict, entities_dict):
+    for intent in nlu_dict['nlu']:
+        examples = ''
+        for example in intent['examples'].split('\n'):
+            entity_match = dict()
+            for entity in entities_dict:
+                entity_match = re.search(f'\[[^\]]*\]\({entity}\)', example)
+                if entity_match:
+                    start, end = entity_match.span()
+                    random_entity_name = f'[{np.random.choice(entities_dict[entity]).strip()}]({entity})'
+                    example = example.replace(example[start:end] , random_entity_name)
+                    
+            examples += example + '\n'
+        intent['examples'] = examples[:-1]  #removing last \n to avoid \n\n after the last example
+    return nlu_dict
+
+
+
+if __name__=='__main__':
+    
+    NLU_FILE = './nlu_cleaned.yml'
+    ENTITY_NAMES = ['drug', 'lab'] #lookup files should be in same directoty as this file
+    OUTPUT_FILE = 'nlu_random.yml'
+    #load files
+    with open(NLU_FILE, 'r') as f:
+        nlu = yaml.load(f, Loader=yaml.RoundTripLoader)
+    entities = load_entities(ENTITY_NAMES)
+
+    randomized_nlu = randomize_entity_names(nlu, entities)
+    #save
+    with open(OUTPUT_FILE, 'w') as f:
+        yaml.dump(randomized_nlu, f, Dumper=yaml.RoundTripDumper, default_flow_style=None)
+
+
+    
+    
+    
+
+    
+    
+    
+    
+    
+    
\ No newline at end of file