a b/development/testcase/rasa_test.py
1
import os
2
import sys
3
import random
4
import string
5
import asyncio
6
import unittest
7
from rasa.model import get_model
8
from rasa.core.agent import Agent
9
from rasa.nlu.test import run_evaluation
10
from rasa.core.test import test as core_test
11
12
path = './production/rasa-server/rasa/'
13
sys.path.append(path)
14
15
16
class TestRasaMethods(unittest.TestCase):
17
    model_path = './production/rasa-server/rasa/models/'
18
    unpacked_model = get_model(model_path)
19
    tmp_name = ''.join(random.choices(
20
        string.ascii_uppercase + string.digits, k=10))
21
22
    """
23
    NLU TEST
24
    """
25
    nlu_model = os.path.join(unpacked_model, "nlu")
26
27
    # Normal NLU tests data
28
    test_data = './production/rasa-server/rasa/train_test_split/test_data.yml'
29
30
    async_test_result = run_evaluation(test_data,
31
                                       nlu_model,
32
                                       successes=True,
33
                                       errors=True,
34
                                       output_directory=f'/tmp/{tmp_name}',
35
                                       disable_plotting=True,
36
                                       report_as_dict=True,
37
                                       )
38
    test_result = asyncio.run(async_test_result)
39
40
    # Typo NLU tests data
41
    test_data_typo = './production/rasa-server/rasa/train_test_split/test_data_typo.yml'
42
43
    async_test_result_typo = run_evaluation(test_data_typo,
44
                                            nlu_model,
45
                                            successes=True,
46
                                            errors=True,
47
                                            output_directory=f'/tmp/{tmp_name}',
48
                                            disable_plotting=True,
49
                                            report_as_dict=True,
50
                                            )
51
    test_result_typo = asyncio.run(async_test_result_typo)
52
53
    """
54
    CORE TEST
55
    """
56
    # Normal Core test data
57
    test_story = './production/rasa-server/rasa/tests/test_stories.yml'
58
59
    _agent = Agent.load(unpacked_model)
60
    async_test_results_core = core_test(test_story,
61
                                        _agent,
62
                                        e2e=False,
63
                                        disable_plotting=True,
64
                                        )
65
    test_result_core = asyncio.run(async_test_results_core)
66
67
    # Test f1_score of intents
68
    def test_f1_intent(self):
69
        threshold = 0.9
70
        test_result = self.test_result
71
        # Check if intent extractor is in the pipeline
72
        if test_result['intent_evaluation'] is not None:
73
            # Check if multiple intent extractors are in the pipeline
74
            if 'report' not in test_result['intent_evaluation']:
75
                for intent in test_result['intent_evaluation']:
76
                    f1_score = test_result['intent_evaluation'][intent]['f1_score']
77
                    self.assertTrue(f1_score > threshold)
78
            else:
79
                f1_score = test_result['intent_evaluation']['f1_score']
80
                self.assertTrue(f1_score > threshold)
81
82
    # Test f1_score of entities
83
    def test_f1_entity(self):
84
        threshold = 0.9
85
        test_result = self.test_result
86
        # Check if entity extractor is in the pipeline
87
        if test_result['entity_evaluation'] is not None:
88
            # Check if multiple entity extractors are in the pipeline
89
            if 'report' not in test_result['entity_evaluation']:
90
                for entity in test_result['entity_evaluation']:
91
                    f1_score = test_result['entity_evaluation'][entity]['f1_score']
92
                    self.assertTrue(f1_score > threshold)
93
            else:
94
                f1_score = test_result['entity_evaluation']['f1_score']
95
                self.assertTrue(f1_score > threshold)
96
97
    # Test f1_score of reponse selectors
98
    def test_f1_response_selector(self):
99
        threshold = 0.9
100
        test_result = self.test_result
101
        # Check if reponse selectors is in the pipeline
102
        if test_result['response_selection_evaluation'] is not None:
103
            # Check if multiple reponse selectors are in the pipeline   
104
            if 'report' not in test_result['response_selection_evaluation']:
105
                for entity in test_result['response_selection_evaluation']:
106
                    f1_score = test_result['response_selection_evaluation'][entity]['f1_score']
107
                    self.assertTrue(f1_score > threshold)
108
            else:
109
                f1_score = test_result['response_selection_evaluation']['f1_score']
110
                self.assertTrue(f1_score > threshold)
111
112
    # Test f1_score of intents - Typo contained data
113
    def test_f1_intent_typo(self):
114
        threshold = 0.9
115
        test_result = self.test_result_typo
116
        # Check if intent extractor is in the pipeline
117
        if test_result['intent_evaluation'] is not None:
118
            # Check if multiple intent extractors are in the pipeline
119
            if 'report' not in test_result['intent_evaluation']:
120
                for intent in test_result['intent_evaluation']:
121
                    f1_score = test_result['intent_evaluation'][intent]['f1_score']
122
                    self.assertTrue(f1_score > threshold)
123
            else:
124
                f1_score = test_result['intent_evaluation']['f1_score']
125
                self.assertTrue(f1_score > threshold)
126
127
    # Test f1_score of entities - Typo contained data
128
    def test_f1_entity_typo(self):
129
        threshold = 0.9
130
        test_result = self.test_result_typo
131
        # Check if entity extractor is in the pipeline
132
        if test_result['entity_evaluation'] is not None:
133
            # Check if multiple entity extractors are in the pipeline
134
            if 'report' not in test_result['entity_evaluation']:
135
                for entity in test_result['entity_evaluation']:
136
                    f1_score = test_result['entity_evaluation'][entity]['f1_score']
137
                    self.assertTrue(f1_score > threshold)
138
            else:
139
                f1_score = test_result['entity_evaluation']['f1_score']
140
                self.assertTrue(f1_score > threshold)
141
142
    # Test f1_score of reponse selectors - Typo contained data
143
    def test_f1_response_selector_typo(self):
144
        threshold = 0.9
145
        test_result = self.test_result_typo
146
        # Check if reponse selectors is in the pipeline
147
        if test_result['response_selection_evaluation'] is not None:
148
            # Check if multiple reponse selectors are in the pipeline   
149
            if 'report' not in test_result['response_selection_evaluation']:
150
                for entity in test_result['response_selection_evaluation']:
151
                    f1_score = test_result['response_selection_evaluation'][entity]['f1_score']
152
                    self.assertTrue(f1_score > threshold)
153
            else:
154
                f1_score = test_result['response_selection_evaluation']['f1_score']
155
                self.assertTrue(f1_score > threshold)
156
157
    # Check f1_score of the rasa core - Test stories
158
    def test_f1_core(self):
159
        threshold = 0.8
160
        test_result = self.test_result_core
161
162
        f1_score = test_result['f1']
163
        self.assertTrue(f1_score > threshold)
164
165
166
if __name__ == '__main__':
167
    # Run tests
168
    unittest.main()