[af3e0d]: / tests / model_evaluation_test.py

Download this file

138 lines (122 with data), 6.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import os
import shutil
import tempfile
import unittest
import numpy as np
from pathlib import Path
from sklearn.datasets import make_classification
from src.models.model_evaluation import ModelEvaluator, PerformanceBenchmark
from src.models.model_factory import ModelFactory, TextClassifier
class TestModelEvaluator(unittest.TestCase):
def setUp(self):
# Create synthetic data with three classes.
X, y = make_classification(
n_samples=100, n_features=20, n_informative=10, n_classes=3,
random_state=42
)
self.X = X.astype(np.float32)
self.y = y
self.feature_names = [f"feature_{i}" for i in range(self.X.shape[1])]
# Create and fit a classifier model using ModelFactory.
self.model = ModelFactory.create_model(model_type='logistic_regression')
self.model.fit(self.X, self.y)
# Manually ensure that classes_ is assigned if not set during fit.
if self.model.classes_ is None:
self.model.classes_ = np.unique(self.y)
# Instantiate the evaluator with 3-fold CV.
self.evaluator = ModelEvaluator(n_splits=3, random_state=42)
def test_perform_cross_validation(self):
cv_metrics = self.evaluator._perform_cross_validation(self.model, self.X, self.y)
self.assertIn('mean', cv_metrics)
self.assertIn('std', cv_metrics)
self.assertIn('confidence_intervals', cv_metrics)
self.assertIsInstance(cv_metrics['mean']['accuracy'], float)
def test_analyze_confusion_matrix(self):
cm_analysis = self.evaluator._analyze_confusion_matrix(self.model, self.X, self.y)
self.assertIn('confusion_matrix', cm_analysis)
self.assertIn('overall_accuracy', cm_analysis)
self.assertIsInstance(cm_analysis['confusion_matrix'], list)
self.assertIsInstance(cm_analysis['overall_accuracy'], float)
def test_perform_roc_analysis(self):
# For logistic regression, predict_proba is available.
roc_analysis = self.evaluator._perform_roc_analysis(self.model, self.X, self.y)
# If the model supports predict_proba, roc_analysis should have these keys.
self.assertIn('per_class_roc', roc_analysis)
self.assertIn('micro_average', roc_analysis)
self.assertIsInstance(roc_analysis['micro_average']['auc'], float)
def test_perform_error_analysis(self):
error_analysis = self.evaluator._perform_error_analysis(
self.model, self.X, self.y, self.feature_names
)
self.assertIn('total_errors', error_analysis)
self.assertIn('error_cases', error_analysis)
# Even if the model is well-trained errors may be zero.
self.assertIsInstance(error_analysis['total_errors'], int)
def test_calculate_performance_metrics(self):
perf_metrics = self.evaluator._calculate_performance_metrics(self.model, self.X, self.y)
self.assertIn('prediction_time', perf_metrics)
self.assertIn('predictions_per_second', perf_metrics)
self.assertIn('memory_usage', perf_metrics)
def test_evaluate_model(self):
# Evaluate the model using the public interface.
results = self.evaluator.evaluate_model(self.model, self.X, self.y, self.feature_names)
expected_keys = [
'cross_validation_metrics',
'confusion_matrix_analysis',
'roc_analysis',
'error_analysis',
'performance_metrics'
]
for key in expected_keys:
self.assertIn(key, results, f"Expected key '{key}' is missing in evaluation results")
class TestPerformanceBenchmark(unittest.TestCase):
def setUp(self):
# Generate synthetic data.
X, y = make_classification(
n_samples=100, n_features=10, n_informative=5, n_classes=3,
random_state=123
)
self.X = X.astype(np.float32)
self.y = y
self.feature_names = [f"feat_{i}" for i in range(self.X.shape[1])]
# Create two models with different types.
self.model1 = ModelFactory.create_model('logistic_regression')
self.model1.fit(self.X, self.y)
if self.model1.classes_ is None:
self.model1.classes_ = np.unique(self.y)
self.model2 = ModelFactory.create_model('svm')
self.model2.fit(self.X, self.y)
if self.model2.classes_ is None:
self.model2.classes_ = np.unique(self.y)
self.models = [self.model1, self.model2]
# Instantiate the PerformanceBenchmark.
self.benchmark = PerformanceBenchmark()
def test_benchmark_models(self):
benchmark_results = self.benchmark.benchmark_models(
self.models, self.X, self.y, self.feature_names
)
self.assertIn('individual_results', benchmark_results)
self.assertIn('comparisons', benchmark_results)
self.assertIn('rankings', benchmark_results)
for model_type, results in benchmark_results['individual_results'].items():
self.assertIsInstance(results, dict)
def test_generate_summary_report(self):
benchmark_results = self.benchmark.benchmark_models(
self.models, self.X, self.y, self.feature_names
)
summary = self.benchmark.generate_summary_report(benchmark_results)
self.assertIsInstance(summary, str)
self.assertIn("Model Benchmarking Summary Report", summary)
def test_visualize_results(self):
# Create a temporary directory for saving plots.
output_dir = Path(tempfile.mkdtemp())
benchmark_results = self.benchmark.benchmark_models(
self.models, self.X, self.y, self.feature_names
)
self.benchmark.visualize_results(benchmark_results, output_dir)
self.assertTrue((output_dir / 'accuracy_comparison.png').exists())
self.assertTrue((output_dir / 'roc_comparison.png').exists())
self.assertTrue((output_dir / 'resource_usage.png').exists())
shutil.rmtree(output_dir)
if __name__ == '__main__':
unittest.main(verbosity=2)