[af3e0d]: / tests / model_factory_test.py

Download this file

246 lines (206 with data), 8.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
import unittest
import numpy as np
from pathlib import Path
import tempfile
import shutil
from sklearn.datasets import make_classification
from src.models.model_factory import ModelFactory, TextClassifier
class TestModelFactory(unittest.TestCase):
"""Test suite for ModelFactory implementation"""
@classmethod
def setUpClass(cls):
"""Set up test data once for all test methods"""
# Create synthetic dataset for testing
X, y = make_classification(
n_samples=1000,
n_features=20,
n_informative=15,
n_redundant=5,
n_classes=5,
random_state=42
)
# Convert to float32 for better numerical stability
cls.X = X.astype(np.float32)
cls.y = y
# Create temporary directory for model saving/loading tests
cls.temp_dir = Path(tempfile.mkdtemp())
@classmethod
def tearDownClass(cls):
"""Clean up after all tests are done"""
# Remove temporary directory and its contents
shutil.rmtree(cls.temp_dir)
def test_supported_models(self):
"""Test getting supported model types"""
supported_models = ModelFactory.get_supported_models()
# Verify all required model types are supported
expected_models = {
'naive_bayes', 'logistic_regression', 'svm',
'random_forest', 'bagging'
}
self.assertEqual(
set(supported_models.keys()),
expected_models,
"Not all required model types are supported"
)
# Verify each model has required configuration
for model_info in supported_models.values():
self.assertIn('description', model_info)
self.assertIn('default_params', model_info)
def test_model_creation(self):
"""Test creating models of each type"""
for model_type in ModelFactory.get_supported_models():
# Create model with default configuration
model = ModelFactory.create_model(model_type)
# Verify model properties
self.assertIsInstance(model, TextClassifier)
self.assertEqual(model.model_type, model_type)
# Verify model can be trained
model.fit(self.X, self.y)
# Verify predictions
predictions = model.predict(self.X)
self.assertEqual(predictions.shape, (len(self.X),))
# Verify unique classes
unique_classes = np.unique(predictions)
self.assertEqual(
len(unique_classes),
len(np.unique(self.y)),
f"Incorrect number of classes for {model_type}"
)
def test_custom_configuration(self):
"""Test model creation with custom configurations"""
test_configs = {
'naive_bayes': {'alpha': 0.5},
'logistic_regression': {'C': 0.1, 'max_iter': 500},
'svm': {'C': 2.0, 'kernel': 'linear'},
'random_forest': {'n_estimators': 50, 'max_depth': 10},
'bagging': {'n_estimators': 20, 'max_samples': 0.8}
}
for model_type, config in test_configs.items():
# Create model with custom config
model = ModelFactory.create_model(
model_type=model_type,
model_config=config
)
# Verify configuration was applied
model_params = model.get_params()
for param, value in config.items():
self.assertIn(param, model_params['model_params'])
self.assertEqual(
model_params['model_params'][param],
value,
f"Parameter {param} not set correctly for {model_type}"
)
def test_model_validation(self):
"""Test model configuration validation"""
# Test valid configurations
valid_config = {
'naive_bayes': {'alpha': 1.0},
'logistic_regression': {'C': 1.0, 'max_iter': 1000},
'svm': {'C': 1.0, 'kernel': 'linear'},
'random_forest': {'n_estimators': 100},
'bagging': {'n_estimators': 100}
}
for model_type, config in valid_config.items():
self.assertTrue(
ModelFactory.validate_model_config(model_type, config),
f"Valid configuration rejected for {model_type}"
)
# Test invalid configurations
with self.assertRaises(ValueError):
ModelFactory.validate_model_config(
'naive_bayes',
{'invalid_param': 1.0}
)
with self.assertRaises(ValueError):
ModelFactory.validate_model_config(
'invalid_model_type',
{'param': 1.0}
)
def test_model_persistence(self):
"""Test model saving and loading"""
for model_type in ModelFactory.get_supported_models():
# Create and train model
model = ModelFactory.create_model(model_type)
model.fit(self.X, self.y)
# Generate predictions before saving
predictions_before = model.predict(self.X)
# Save model
save_path = self.temp_dir / f"{model_type}_model.joblib"
model.save(save_path)
# Load model
loaded_model = ModelFactory.load_model(save_path)
# Verify predictions match
predictions_after = loaded_model.predict(self.X)
np.testing.assert_array_equal(
predictions_before,
predictions_after,
f"Predictions changed after save/load for {model_type}"
)
def test_ensemble_creation(self):
"""Test creating model ensembles"""
# Test creating ensemble with default configurations
model_types = ['naive_bayes', 'svm', 'random_forest']
ensemble = ModelFactory.create_ensemble(model_types)
# Verify ensemble properties
self.assertEqual(len(ensemble), len(model_types))
for model, expected_type in zip(ensemble, model_types):
self.assertEqual(model.model_type, expected_type)
# Test with custom configurations
configs = [
{'alpha': 0.5},
{'C': 2.0, 'kernel': 'linear'},
{'n_estimators': 50}
]
ensemble = ModelFactory.create_ensemble(model_types, configs)
# Verify configurations were applied
for model, config in zip(ensemble, configs):
model_params = model.get_params()
for param, value in config.items():
self.assertEqual(
model_params['model_params'][param],
value
)
def test_error_handling(self):
"""Test error handling in model factory"""
# Test invalid model type
with self.assertRaises(ValueError):
ModelFactory.create_model('invalid_model_type')
# Test invalid configuration
with self.assertRaises(ValueError):
ModelFactory.create_model(
'naive_bayes',
{'invalid_param': 1.0}
)
# Test invalid ensemble configuration
with self.assertRaises(ValueError):
ModelFactory.create_ensemble(
['naive_bayes', 'svm'],
[{'alpha': 0.5}] # Mismatched number of configurations
)
# Test loading non-existent model
with self.assertRaises(FileNotFoundError):
ModelFactory.load_model('non_existent_model.joblib')
def test_model_evaluation(self):
"""Test model evaluation functionality"""
# Split data into train and test sets
train_size = int(0.8 * len(self.X))
X_train = self.X[:train_size]
y_train = self.y[:train_size]
X_test = self.X[train_size:]
y_test = self.y[train_size:]
for model_type in ModelFactory.get_supported_models():
# Create and train model
model = ModelFactory.create_model(model_type)
model.fit(X_train, y_train)
# Evaluate model
evaluation = model.evaluate(X_test, y_test)
# Verify evaluation metrics
self.assertIn('overall_metrics', evaluation)
self.assertIn('per_class_metrics', evaluation)
self.assertIn('confusion_matrix', evaluation)
self.assertIn('error_analysis', evaluation)
# Verify metric values are valid
self.assertGreaterEqual(evaluation['overall_metrics']['accuracy'], 0.0)
self.assertLessEqual(evaluation['overall_metrics']['accuracy'], 1.0)
if __name__ == '__main__':
unittest.main(verbosity=2)