Diff of /data/data_loader.py [000000] .. [9d22e8]

Switch to unified view

a b/data/data_loader.py
1
# data/data_loader.py
2
import pandas as pd
3
import numpy as np
4
from sklearn.model_selection import train_test_split
5
from sklearn.preprocessing import StandardScaler
6
import logging
7
from app.config import DATA_PATH, FEATURES, TARGET, TEST_SIZE, RANDOM_STATE
8
9
def load_data(file_path='data/patient_data.csv'):
10
    """
11
    Load data from CSV file
12
    """
13
    try:
14
        df = pd.read_csv(file_path)
15
        print(f"Data loaded successfully with {len(df)} rows")  # Debug print
16
        return df
17
    except Exception as e:
18
        print(f"Error loading data: {e}")
19
        return None
20
21
def preprocess_data(df):
22
    """
23
    Preprocess the data and split into features and target
24
    Returns:
25
        X_train, X_test, y_train, y_test, feature_names, scaler
26
    """
27
    print("Starting preprocessing...")  # Debug print
28
    
29
    if df is None:
30
        print("DataFrame is None")  # Debug print
31
        return None, None, None, None, None, None
32
    
33
    try:
34
        # Separate features and target
35
        X = df.drop('readmitted', axis=1)
36
        y = df['readmitted']
37
        
38
        # Get feature names
39
        feature_names = X.columns.tolist()
40
        print(f"Features: {feature_names}")  # Debug print
41
        
42
        # Scale the features
43
        scaler = StandardScaler()
44
        X_scaled = scaler.fit_transform(X)
45
        X_scaled = pd.DataFrame(X_scaled, columns=feature_names)
46
        
47
        # Split the data
48
        X_train, X_test, y_train, y_test = train_test_split(
49
            X_scaled, y, test_size=0.2, random_state=42, stratify=y
50
        )
51
        
52
        print("Preprocessing completed successfully")  # Debug print
53
        
54
        # Explicitly create the return tuple
55
        result = (X_train, X_test, y_train, y_test, feature_names, scaler)
56
        print(f"Number of return values: {len(result)}")  # Debug print
57
        return result
58
    
59
    except Exception as e:
60
        print(f"Error in preprocessing: {e}")
61
        return None, None, None, None, None, None