Switch to side-by-side view

--- a
+++ b/development/testcase/rasa_test.py
@@ -0,0 +1,168 @@
+import os
+import sys
+import random
+import string
+import asyncio
+import unittest
+from rasa.model import get_model
+from rasa.core.agent import Agent
+from rasa.nlu.test import run_evaluation
+from rasa.core.test import test as core_test
+
+path = './production/rasa-server/rasa/'
+sys.path.append(path)
+
+
+class TestRasaMethods(unittest.TestCase):
+    model_path = './production/rasa-server/rasa/models/'
+    unpacked_model = get_model(model_path)
+    tmp_name = ''.join(random.choices(
+        string.ascii_uppercase + string.digits, k=10))
+
+    """
+    NLU TEST
+    """
+    nlu_model = os.path.join(unpacked_model, "nlu")
+
+    # Normal NLU tests data
+    test_data = './production/rasa-server/rasa/train_test_split/test_data.yml'
+
+    async_test_result = run_evaluation(test_data,
+                                       nlu_model,
+                                       successes=True,
+                                       errors=True,
+                                       output_directory=f'/tmp/{tmp_name}',
+                                       disable_plotting=True,
+                                       report_as_dict=True,
+                                       )
+    test_result = asyncio.run(async_test_result)
+
+    # Typo NLU tests data
+    test_data_typo = './production/rasa-server/rasa/train_test_split/test_data_typo.yml'
+
+    async_test_result_typo = run_evaluation(test_data_typo,
+                                            nlu_model,
+                                            successes=True,
+                                            errors=True,
+                                            output_directory=f'/tmp/{tmp_name}',
+                                            disable_plotting=True,
+                                            report_as_dict=True,
+                                            )
+    test_result_typo = asyncio.run(async_test_result_typo)
+
+    """
+    CORE TEST
+    """
+    # Normal Core test data
+    test_story = './production/rasa-server/rasa/tests/test_stories.yml'
+
+    _agent = Agent.load(unpacked_model)
+    async_test_results_core = core_test(test_story,
+                                        _agent,
+                                        e2e=False,
+                                        disable_plotting=True,
+                                        )
+    test_result_core = asyncio.run(async_test_results_core)
+
+    # Test f1_score of intents
+    def test_f1_intent(self):
+        threshold = 0.9
+        test_result = self.test_result
+        # Check if intent extractor is in the pipeline
+        if test_result['intent_evaluation'] is not None:
+            # Check if multiple intent extractors are in the pipeline
+            if 'report' not in test_result['intent_evaluation']:
+                for intent in test_result['intent_evaluation']:
+                    f1_score = test_result['intent_evaluation'][intent]['f1_score']
+                    self.assertTrue(f1_score > threshold)
+            else:
+                f1_score = test_result['intent_evaluation']['f1_score']
+                self.assertTrue(f1_score > threshold)
+
+    # Test f1_score of entities
+    def test_f1_entity(self):
+        threshold = 0.9
+        test_result = self.test_result
+        # Check if entity extractor is in the pipeline
+        if test_result['entity_evaluation'] is not None:
+            # Check if multiple entity extractors are in the pipeline
+            if 'report' not in test_result['entity_evaluation']:
+                for entity in test_result['entity_evaluation']:
+                    f1_score = test_result['entity_evaluation'][entity]['f1_score']
+                    self.assertTrue(f1_score > threshold)
+            else:
+                f1_score = test_result['entity_evaluation']['f1_score']
+                self.assertTrue(f1_score > threshold)
+
+    # Test f1_score of reponse selectors
+    def test_f1_response_selector(self):
+        threshold = 0.9
+        test_result = self.test_result
+        # Check if reponse selectors is in the pipeline
+        if test_result['response_selection_evaluation'] is not None:
+            # Check if multiple reponse selectors are in the pipeline   
+            if 'report' not in test_result['response_selection_evaluation']:
+                for entity in test_result['response_selection_evaluation']:
+                    f1_score = test_result['response_selection_evaluation'][entity]['f1_score']
+                    self.assertTrue(f1_score > threshold)
+            else:
+                f1_score = test_result['response_selection_evaluation']['f1_score']
+                self.assertTrue(f1_score > threshold)
+
+    # Test f1_score of intents - Typo contained data
+    def test_f1_intent_typo(self):
+        threshold = 0.9
+        test_result = self.test_result_typo
+        # Check if intent extractor is in the pipeline
+        if test_result['intent_evaluation'] is not None:
+            # Check if multiple intent extractors are in the pipeline
+            if 'report' not in test_result['intent_evaluation']:
+                for intent in test_result['intent_evaluation']:
+                    f1_score = test_result['intent_evaluation'][intent]['f1_score']
+                    self.assertTrue(f1_score > threshold)
+            else:
+                f1_score = test_result['intent_evaluation']['f1_score']
+                self.assertTrue(f1_score > threshold)
+
+    # Test f1_score of entities - Typo contained data
+    def test_f1_entity_typo(self):
+        threshold = 0.9
+        test_result = self.test_result_typo
+        # Check if entity extractor is in the pipeline
+        if test_result['entity_evaluation'] is not None:
+            # Check if multiple entity extractors are in the pipeline
+            if 'report' not in test_result['entity_evaluation']:
+                for entity in test_result['entity_evaluation']:
+                    f1_score = test_result['entity_evaluation'][entity]['f1_score']
+                    self.assertTrue(f1_score > threshold)
+            else:
+                f1_score = test_result['entity_evaluation']['f1_score']
+                self.assertTrue(f1_score > threshold)
+
+    # Test f1_score of reponse selectors - Typo contained data
+    def test_f1_response_selector_typo(self):
+        threshold = 0.9
+        test_result = self.test_result_typo
+        # Check if reponse selectors is in the pipeline
+        if test_result['response_selection_evaluation'] is not None:
+            # Check if multiple reponse selectors are in the pipeline   
+            if 'report' not in test_result['response_selection_evaluation']:
+                for entity in test_result['response_selection_evaluation']:
+                    f1_score = test_result['response_selection_evaluation'][entity]['f1_score']
+                    self.assertTrue(f1_score > threshold)
+            else:
+                f1_score = test_result['response_selection_evaluation']['f1_score']
+                self.assertTrue(f1_score > threshold)
+
+    # Check f1_score of the rasa core - Test stories
+    def test_f1_core(self):
+        threshold = 0.8
+        test_result = self.test_result_core
+
+        f1_score = test_result['f1']
+        self.assertTrue(f1_score > threshold)
+
+
+if __name__ == '__main__':
+    # Run tests
+    unittest.main()