MedicalTrialClass / Git / [af3e0d] /tests/model_factory

Models:
joseph-gordon/
MedicalTrialClass
Downloads: 1
[af3e0d]: / tests / model_factory_test.py
History
Download this file
246 lines (206 with data), 8.8 kB

import unittest
import numpy as np
from pathlib import Path
import tempfile
import shutil
from sklearn.datasets import make_classification
from src.models.model_factory import ModelFactory, TextClassifier


class TestModelFactory(unittest.TestCase):
    """Test suite for ModelFactory implementation"""

    @classmethod
    def setUpClass(cls):
        """Set up test data once for all test methods"""
        # Create synthetic dataset for testing
        X, y = make_classification(
            n_samples=1000,
            n_features=20,
            n_informative=15,
            n_redundant=5,
            n_classes=5,
            random_state=42
        )
        # Convert to float32 for better numerical stability
        cls.X = X.astype(np.float32)
        cls.y = y

        # Create temporary directory for model saving/loading tests
        cls.temp_dir = Path(tempfile.mkdtemp())

    @classmethod
    def tearDownClass(cls):
        """Clean up after all tests are done"""
        # Remove temporary directory and its contents
        shutil.rmtree(cls.temp_dir)

    def test_supported_models(self):
        """Test getting supported model types"""
        supported_models = ModelFactory.get_supported_models()

        # Verify all required model types are supported
        expected_models = {
            'naive_bayes', 'logistic_regression', 'svm',
            'random_forest', 'bagging'
        }
        self.assertEqual(
            set(supported_models.keys()),
            expected_models,
            "Not all required model types are supported"
        )

        # Verify each model has required configuration
        for model_info in supported_models.values():
            self.assertIn('description', model_info)
            self.assertIn('default_params', model_info)

    def test_model_creation(self):
        """Test creating models of each type"""
        for model_type in ModelFactory.get_supported_models():
            # Create model with default configuration
            model = ModelFactory.create_model(model_type)

            # Verify model properties
            self.assertIsInstance(model, TextClassifier)
            self.assertEqual(model.model_type, model_type)

            # Verify model can be trained
            model.fit(self.X, self.y)

            # Verify predictions
            predictions = model.predict(self.X)
            self.assertEqual(predictions.shape, (len(self.X),))

            # Verify unique classes
            unique_classes = np.unique(predictions)
            self.assertEqual(
                len(unique_classes),
                len(np.unique(self.y)),
                f"Incorrect number of classes for {model_type}"
            )

    def test_custom_configuration(self):
        """Test model creation with custom configurations"""
        test_configs = {
            'naive_bayes': {'alpha': 0.5},
            'logistic_regression': {'C': 0.1, 'max_iter': 500},
            'svm': {'C': 2.0, 'kernel': 'linear'},
            'random_forest': {'n_estimators': 50, 'max_depth': 10},
            'bagging': {'n_estimators': 20, 'max_samples': 0.8}
        }

        for model_type, config in test_configs.items():
            # Create model with custom config
            model = ModelFactory.create_model(
                model_type=model_type,
                model_config=config
            )

            # Verify configuration was applied
            model_params = model.get_params()
            for param, value in config.items():
                self.assertIn(param, model_params['model_params'])
                self.assertEqual(
                    model_params['model_params'][param],
                    value,
                    f"Parameter {param} not set correctly for {model_type}"
                )

    def test_model_validation(self):
        """Test model configuration validation"""
        # Test valid configurations
        valid_config = {
            'naive_bayes': {'alpha': 1.0},
            'logistic_regression': {'C': 1.0, 'max_iter': 1000},
            'svm': {'C': 1.0, 'kernel': 'linear'},
            'random_forest': {'n_estimators': 100},
            'bagging': {'n_estimators': 100}
        }

        for model_type, config in valid_config.items():
            self.assertTrue(
                ModelFactory.validate_model_config(model_type, config),
                f"Valid configuration rejected for {model_type}"
            )

        # Test invalid configurations
        with self.assertRaises(ValueError):
            ModelFactory.validate_model_config(
                'naive_bayes',
                {'invalid_param': 1.0}
            )

        with self.assertRaises(ValueError):
            ModelFactory.validate_model_config(
                'invalid_model_type',
                {'param': 1.0}
            )

    def test_model_persistence(self):
        """Test model saving and loading"""
        for model_type in ModelFactory.get_supported_models():
            # Create and train model
            model = ModelFactory.create_model(model_type)
            model.fit(self.X, self.y)

            # Generate predictions before saving
            predictions_before = model.predict(self.X)

            # Save model
            save_path = self.temp_dir / f"{model_type}_model.joblib"
            model.save(save_path)

            # Load model
            loaded_model = ModelFactory.load_model(save_path)

            # Verify predictions match
            predictions_after = loaded_model.predict(self.X)
            np.testing.assert_array_equal(
                predictions_before,
                predictions_after,
                f"Predictions changed after save/load for {model_type}"
            )

    def test_ensemble_creation(self):
        """Test creating model ensembles"""
        # Test creating ensemble with default configurations
        model_types = ['naive_bayes', 'svm', 'random_forest']
        ensemble = ModelFactory.create_ensemble(model_types)

        # Verify ensemble properties
        self.assertEqual(len(ensemble), len(model_types))
        for model, expected_type in zip(ensemble, model_types):
            self.assertEqual(model.model_type, expected_type)

        # Test with custom configurations
        configs = [
            {'alpha': 0.5},
            {'C': 2.0, 'kernel': 'linear'},
            {'n_estimators': 50}
        ]
        ensemble = ModelFactory.create_ensemble(model_types, configs)

        # Verify configurations were applied
        for model, config in zip(ensemble, configs):
            model_params = model.get_params()
            for param, value in config.items():
                self.assertEqual(
                    model_params['model_params'][param],
                    value
                )

    def test_error_handling(self):
        """Test error handling in model factory"""
        # Test invalid model type
        with self.assertRaises(ValueError):
            ModelFactory.create_model('invalid_model_type')

        # Test invalid configuration
        with self.assertRaises(ValueError):
            ModelFactory.create_model(
                'naive_bayes',
                {'invalid_param': 1.0}
            )

        # Test invalid ensemble configuration
        with self.assertRaises(ValueError):
            ModelFactory.create_ensemble(
                ['naive_bayes', 'svm'],
                [{'alpha': 0.5}]  # Mismatched number of configurations
            )

        # Test loading non-existent model
        with self.assertRaises(FileNotFoundError):
            ModelFactory.load_model('non_existent_model.joblib')

    def test_model_evaluation(self):
        """Test model evaluation functionality"""
        # Split data into train and test sets
        train_size = int(0.8 * len(self.X))
        X_train = self.X[:train_size]
        y_train = self.y[:train_size]
        X_test = self.X[train_size:]
        y_test = self.y[train_size:]

        for model_type in ModelFactory.get_supported_models():
            # Create and train model
            model = ModelFactory.create_model(model_type)
            model.fit(X_train, y_train)

            # Evaluate model
            evaluation = model.evaluate(X_test, y_test)

            # Verify evaluation metrics
            self.assertIn('overall_metrics', evaluation)
            self.assertIn('per_class_metrics', evaluation)
            self.assertIn('confusion_matrix', evaluation)
            self.assertIn('error_analysis', evaluation)

            # Verify metric values are valid
            self.assertGreaterEqual(evaluation['overall_metrics']['accuracy'], 0.0)
            self.assertLessEqual(evaluation['overall_metrics']['accuracy'], 1.0)


if __name__ == '__main__':
    unittest.main(verbosity=2)