NaBot / Git / [383a81] /development/testcase/rasa

Models:
philipB/
NaBot
Downloads: 1
[383a81]: / development / testcase / rasa_test.py
History
Download this file
169 lines (148 with data), 7.3 kB

import os
import sys
import random
import string
import asyncio
import unittest
from rasa.model import get_model
from rasa.core.agent import Agent
from rasa.nlu.test import run_evaluation
from rasa.core.test import test as core_test

path = './production/rasa-server/rasa/'
sys.path.append(path)


class TestRasaMethods(unittest.TestCase):
    model_path = './production/rasa-server/rasa/models/'
    unpacked_model = get_model(model_path)
    tmp_name = ''.join(random.choices(
        string.ascii_uppercase + string.digits, k=10))

    """
    NLU TEST
    """
    nlu_model = os.path.join(unpacked_model, "nlu")

    # Normal NLU tests data
    test_data = './production/rasa-server/rasa/train_test_split/test_data.yml'

    async_test_result = run_evaluation(test_data,
                                       nlu_model,
                                       successes=True,
                                       errors=True,
                                       output_directory=f'/tmp/{tmp_name}',
                                       disable_plotting=True,
                                       report_as_dict=True,
                                       )
    test_result = asyncio.run(async_test_result)

    # Typo NLU tests data
    test_data_typo = './production/rasa-server/rasa/train_test_split/test_data_typo.yml'

    async_test_result_typo = run_evaluation(test_data_typo,
                                            nlu_model,
                                            successes=True,
                                            errors=True,
                                            output_directory=f'/tmp/{tmp_name}',
                                            disable_plotting=True,
                                            report_as_dict=True,
                                            )
    test_result_typo = asyncio.run(async_test_result_typo)

    """
    CORE TEST
    """
    # Normal Core test data
    test_story = './production/rasa-server/rasa/tests/test_stories.yml'

    _agent = Agent.load(unpacked_model)
    async_test_results_core = core_test(test_story,
                                        _agent,
                                        e2e=False,
                                        disable_plotting=True,
                                        )
    test_result_core = asyncio.run(async_test_results_core)

    # Test f1_score of intents
    def test_f1_intent(self):
        threshold = 0.9
        test_result = self.test_result
        # Check if intent extractor is in the pipeline
        if test_result['intent_evaluation'] is not None:
            # Check if multiple intent extractors are in the pipeline
            if 'report' not in test_result['intent_evaluation']:
                for intent in test_result['intent_evaluation']:
                    f1_score = test_result['intent_evaluation'][intent]['f1_score']
                    self.assertTrue(f1_score > threshold)
            else:
                f1_score = test_result['intent_evaluation']['f1_score']
                self.assertTrue(f1_score > threshold)

    # Test f1_score of entities
    def test_f1_entity(self):
        threshold = 0.9
        test_result = self.test_result
        # Check if entity extractor is in the pipeline
        if test_result['entity_evaluation'] is not None:
            # Check if multiple entity extractors are in the pipeline
            if 'report' not in test_result['entity_evaluation']:
                for entity in test_result['entity_evaluation']:
                    f1_score = test_result['entity_evaluation'][entity]['f1_score']
                    self.assertTrue(f1_score > threshold)
            else:
                f1_score = test_result['entity_evaluation']['f1_score']
                self.assertTrue(f1_score > threshold)

    # Test f1_score of reponse selectors
    def test_f1_response_selector(self):
        threshold = 0.9
        test_result = self.test_result
        # Check if reponse selectors is in the pipeline
        if test_result['response_selection_evaluation'] is not None:
            # Check if multiple reponse selectors are in the pipeline   
            if 'report' not in test_result['response_selection_evaluation']:
                for entity in test_result['response_selection_evaluation']:
                    f1_score = test_result['response_selection_evaluation'][entity]['f1_score']
                    self.assertTrue(f1_score > threshold)
            else:
                f1_score = test_result['response_selection_evaluation']['f1_score']
                self.assertTrue(f1_score > threshold)

    # Test f1_score of intents - Typo contained data
    def test_f1_intent_typo(self):
        threshold = 0.9
        test_result = self.test_result_typo
        # Check if intent extractor is in the pipeline
        if test_result['intent_evaluation'] is not None:
            # Check if multiple intent extractors are in the pipeline
            if 'report' not in test_result['intent_evaluation']:
                for intent in test_result['intent_evaluation']:
                    f1_score = test_result['intent_evaluation'][intent]['f1_score']
                    self.assertTrue(f1_score > threshold)
            else:
                f1_score = test_result['intent_evaluation']['f1_score']
                self.assertTrue(f1_score > threshold)

    # Test f1_score of entities - Typo contained data
    def test_f1_entity_typo(self):
        threshold = 0.9
        test_result = self.test_result_typo
        # Check if entity extractor is in the pipeline
        if test_result['entity_evaluation'] is not None:
            # Check if multiple entity extractors are in the pipeline
            if 'report' not in test_result['entity_evaluation']:
                for entity in test_result['entity_evaluation']:
                    f1_score = test_result['entity_evaluation'][entity]['f1_score']
                    self.assertTrue(f1_score > threshold)
            else:
                f1_score = test_result['entity_evaluation']['f1_score']
                self.assertTrue(f1_score > threshold)

    # Test f1_score of reponse selectors - Typo contained data
    def test_f1_response_selector_typo(self):
        threshold = 0.9
        test_result = self.test_result_typo
        # Check if reponse selectors is in the pipeline
        if test_result['response_selection_evaluation'] is not None:
            # Check if multiple reponse selectors are in the pipeline   
            if 'report' not in test_result['response_selection_evaluation']:
                for entity in test_result['response_selection_evaluation']:
                    f1_score = test_result['response_selection_evaluation'][entity]['f1_score']
                    self.assertTrue(f1_score > threshold)
            else:
                f1_score = test_result['response_selection_evaluation']['f1_score']
                self.assertTrue(f1_score > threshold)

    # Check f1_score of the rasa core - Test stories
    def test_f1_core(self):
        threshold = 0.8
        test_result = self.test_result_core

        f1_score = test_result['f1']
        self.assertTrue(f1_score > threshold)


if __name__ == '__main__':
    # Run tests
    unittest.main()