MedicalTrialClass / Git / Diff of /tests/model_file

Models:
joseph-gordon/
MedicalTrialClass
Downloads: 1
Diff of /tests/model_file_check.py [000000] .. [46c9de]
Switch to side-by-side view

--- a
+++ b/tests/model_file_check.py
@@ -0,0 +1,95 @@
+import joblib
+from pathlib import Path
+from sklearn.ensemble import VotingClassifier
+from sklearn.preprocessing import StandardScaler
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+
+def check_model_file():
+    """Check if model file exists and examine its contents"""
+    model_path = Path("../src/models/model.joblib")
+
+    # Check if file exists
+    if not model_path.exists():
+        print(f"Model file not found at {model_path}")
+        return
+
+    print(f"Model file found at {model_path}")
+
+    try:
+        # Load the model
+        model_data = joblib.load(model_path)
+        print("\nModel data type:", type(model_data))
+
+        # Check if it's a dictionary or direct model
+        if isinstance(model_data, dict):
+            print("\nModel is saved as a dictionary with components:")
+            for key, value in model_data.items():
+                print(f"- {key}: {type(value)}")
+        else:
+            print("\nModel is saved directly as:", type(model_data))
+
+        # Check for VotingClassifier
+        if isinstance(model_data, VotingClassifier) or (
+                isinstance(model_data, dict) and isinstance(model_data.get('model'), VotingClassifier)):
+            print("\nVotingClassifier found in model")
+            voting_clf = model_data if isinstance(model_data, VotingClassifier) else model_data['model']
+            print("- Estimators:", [est[0] for est in voting_clf.estimators])
+            print("- Voting:", voting_clf.voting)
+
+        # Check for scaler
+        if isinstance(model_data, dict) and 'scaler' in model_data:
+            print("\nScaler found in model:")
+            print("- Type:", type(model_data['scaler']))
+            if isinstance(model_data['scaler'], StandardScaler):
+                print("- Feature count:", model_data['scaler'].n_features_in_)
+
+        # Check for vectorizer
+        if isinstance(model_data, dict) and 'vectorizer' in model_data:
+            print("\nVectorizer found in model:")
+            print("- Type:", type(model_data['vectorizer']))
+            if isinstance(model_data['vectorizer'], TfidfVectorizer):
+                print("- Vocabulary size:", len(model_data['vectorizer'].vocabulary_) if model_data[
+                    'vectorizer'].vocabulary_ else "Not fitted")
+        else:
+            print("\nNo vectorizer found in model!")
+
+    except Exception as e:
+        print(f"\nError examining model file: {str(e)}")
+
+
+def check_model_structure():
+    """Print detailed model structure"""
+    model_path = Path("../src/models/model.joblib")
+
+    # Load the model
+    pipeline = joblib.load(model_path)
+
+    print("\nModel Pipeline Structure:")
+    print("=======================")
+
+    # Check all top-level keys
+    print("\nTop level keys:", list(pipeline.keys()))
+
+    # Check model structure
+    print("\nModel structure:")
+    model = pipeline['model']
+    if isinstance(model, dict):
+        print("Model is a dictionary containing:", list(model.keys()))
+    else:
+        print(f"Model is of type: {type(model)}")
+        print("Model attributes:", dir(model))
+
+    # Print other components
+    print("\nOther components:")
+    for key, value in pipeline.items():
+        if key != 'model':
+            print(f"{key}: {type(value)}")
+            if hasattr(value, 'get_params'):
+                print(f"Parameters: {value.get_params()}")
+
+
+if __name__ == "__main__":
+    check_model_file()
+    check_model_structure()
+