Diff of /code/ml_project.py [000000] .. [ecb694]

Switch to side-by-side view

--- a
+++ b/code/ml_project.py
@@ -0,0 +1,788 @@
+# -*- coding: utf-8 -*-
+"""ML_Project.ipynb
+
+Automatically generated by Colab.
+
+Original file is located at
+    https://colab.research.google.com/drive/1N-OfEL_dUBWC58ZTYK4NUEUkakSCj2rS
+"""
+
+import pandas as pd
+
+# Load the data from the CSV file
+data = pd.read_csv('/content/Dataaa.csv')
+
+# Display the number of features
+print("Number of features in the dataset:", data.shape[1])
+print("Names of the features:", data.columns.tolist())
+
+# Display the first few lines of the CSV file to understand its content
+with open('/content/Dataaa.csv', 'r') as file:
+    for _ in range(5):
+        print(file.readline())
+
+import numpy as np
+import pandas as pd
+
+# Assuming you have your data loaded into numpy arrays, for example:
+# X_in is your input data and X_out is your output data
+# Here is a simple example of how to create these arrays (replace this with your actual data loading code)
+# X_in = np.random.rand(100, 10)  # Example: 100 samples, 10 features
+# X_out = np.random.rand(100, 3)  # Example: 100 samples, 3 output targets
+
+# Convert numpy arrays to pandas DataFrame
+X_in_df = pd.DataFrame(X_in)
+X_out_df = pd.DataFrame(X_out)
+
+# Concatenate both DataFrames along the columns
+data_df = pd.concat([X_in_df, X_out_df], axis=1)
+
+# Save the DataFrame to a CSV file
+data_df.to_csv('/content/corrected_data.csv', index=False)
+
+import numpy as np
+import pandas as pd
+
+# Example data (replace with your actual data arrays)
+X_in = np.random.rand(100, 10)  # 100 samples, 10 features
+X_out = np.random.rand(100, 1)  # 100 samples, 1 target
+
+# Convert to DataFrame
+df_in = pd.DataFrame(X_in, columns=[f'feature_{i}' for i in range(X_in.shape[1])])
+df_out = pd.DataFrame(X_out, columns=['target'])
+
+# Combine input and output data
+full_df = pd.concat([df_in, df_out], axis=1)
+
+# Save to CSV
+full_df.to_csv('/content/corrected_data.csv', index=False)
+
+import pandas as pd
+
+# Load the data from the corrected CSV file
+data = pd.read_csv('/content/corrected_data.csv')
+
+# Display the first few rows of the dataset and the shape to verify
+print(data.head())
+print("Shape of the dataset:", data.shape)
+print("Column names:", data.columns.tolist())
+
+##Normalizing and Splitting the data
+
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+
+# Selecting input features and target
+X = data.iloc[:, :-1].values  # All columns except the last are features
+y = data.iloc[:, -1].values  # Last column is the target
+
+# Normalize the input data
+scaler = StandardScaler()
+X_normalized = scaler.fit_transform(X)
+
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=42)
+
+# Output the shapes of the datasets to verify everything is as expected
+print("Train data shape:", X_train.shape)
+print("Test data shape:", X_test.shape)
+
+##Training the RNN Model
+
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, SimpleRNN
+
+# Define the RNN model
+model = Sequential([
+    SimpleRNN(50, input_shape=(X_train.shape[1], 1)),  # 50 RNN units, considering each feature as a time step
+    Dense(1)  # Output layer with one neuron for regression output (the target)
+])
+
+# Compile the model
+model.compile(optimizer='adam', loss='mean_squared_error')
+
+# Reshape input for RNN which expects (batch_size, timesteps, features)
+X_train_rnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
+X_test_rnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
+
+# Train the model
+history = model.fit(X_train_rnn, y_train, epochs=100, validation_data=(X_test_rnn, y_test))
+
+# Optionally, plot the training and validation loss
+import matplotlib.pyplot as plt
+
+plt.plot(history.history['loss'], label='train')
+plt.plot(history.history['val_loss'], label='test')
+plt.title('Model Loss')
+plt.ylabel('Loss')
+plt.xlabel('Epoch')
+plt.legend()
+plt.show()
+
+model.save('/content/my_rnn_model.h5')  # Saves the model for later use
+
+# Example threshold value - you need to choose what makes sense for your data
+threshold = 0.5
+
+# Convert continuous target data to binary classification
+y_train_class = (y_train > threshold).astype(int)
+
+# Proceed with the rest of your RNN setup as before
+
+##Classifying the presence of damage
+
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import SimpleRNN, Dense
+
+# Define the classification model
+classification_model = Sequential([
+    SimpleRNN(50, input_shape=(X_train.shape[1], 1)),  # Adjust the input shape and units as necessary
+    Dense(1, activation='sigmoid')  # Sigmoid activation for binary classification
+])
+
+# Compile the classification model
+classification_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+
+# Reshape data for RNN
+X_train_rnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
+
+# Train the classification model
+classification_model.fit(X_train_rnn, y_train_class, epochs=100, validation_split=0.2)
+
+# Save the classification model
+classification_model.save('/content/classification_model.h5')
+
+# Assuming your original test labels are in y_test, and they are not binary (0 or 1) yet
+# Apply the same threshold used for the training data
+y_test_class = (y_test > threshold).astype(int)
+
+# Now you can compute the confusion matrix and plot it
+cm = confusion_matrix(y_test_class, y_pred_class)
+disp = ConfusionMatrixDisplay(confusion_matrix=cm)
+disp.plot(cmap=plt.cm.Blues)
+plt.title('Confusion Matrix')
+plt.show()
+
+# And compute the ROC curve
+fpr, tpr, _ = roc_curve(y_test_class, y_pred_probs.ravel())
+roc_auc = auc(fpr, tpr)
+
+# Plot the ROC curve
+plt.figure()
+plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
+plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+plt.xlabel('False Positive Rate')
+plt.ylabel('True Positive Rate')
+plt.title('Receiver Operating Characteristic')
+plt.legend(loc="lower right")
+plt.show()
+
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dense, Dropout
+from sklearn.utils.class_weight import compute_class_weight
+from sklearn.metrics import precision_recall_curve
+
+# Calculate class weights
+class_weights = compute_class_weight('balanced', classes=np.unique(y_train_class), y=y_train_class)
+class_weights_dict = dict(enumerate(class_weights))
+
+# Build an LSTM model
+model = Sequential([
+    LSTM(100, input_shape=(X_train.shape[1], 1), return_sequences=True),
+    Dropout(0.5),
+    LSTM(100),
+    Dropout(0.5),
+    Dense(1, activation='sigmoid')
+])
+
+# Compile the model
+model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+
+# Train the model with class weights
+history = model.fit(X_train_rnn, y_train_class, epochs=100, validation_split=0.2, class_weight=class_weights_dict)
+
+# Predict probabilities
+y_pred_probs = model.predict(X_test_rnn)
+
+# Find the optimal threshold based on precision-recall tradeoff
+precision, recall, thresholds = precision_recall_curve(y_test_class, y_pred_probs)
+# Convert to f score
+fscore = (2 * precision * recall) / (precision + recall)
+# Locate the index of the largest f score
+ix = np.argmax(fscore)
+optimal_threshold = thresholds[ix]
+
+# Use the optimal threshold to convert probabilities to binary predictions
+y_pred_class = (y_pred_probs > optimal_threshold).astype(int)
+
+# Recompute the confusion matrix and ROC curve using the new threshold
+
+from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc
+import matplotlib.pyplot as plt
+
+# Use the optimal threshold to convert probabilities to binary predictions
+y_pred_class_optimal = (y_pred_probs > optimal_threshold).astype(int)
+
+# Compute the confusion matrix using the optimal threshold
+cm_optimal = confusion_matrix(y_test_class, y_pred_class_optimal)
+
+# Display the confusion matrix
+disp = ConfusionMatrixDisplay(confusion_matrix=cm_optimal)
+disp.plot(cmap=plt.cm.Blues)
+plt.title('Confusion Matrix with Optimal Threshold')
+plt.show()
+
+# Compute ROC curve and AUC
+fpr, tpr, _ = roc_curve(y_test_class, y_pred_probs)
+roc_auc = auc(fpr, tpr)
+
+# Plot ROC curve
+plt.figure()
+plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
+plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+plt.xlabel('False Positive Rate')
+plt.ylabel('True Positive Rate')
+plt.title('Receiver Operating Characteristic with Optimal Threshold')
+plt.legend(loc="lower right")
+plt.show()
+
+# Make sure the 'y_test_class' and 'y_train_class' variables are set correctly before this step.
+# For the sake of example, let's assume 'y_test' is the variable holding the original test labels.
+
+# Verify and correct labels if necessary
+# 'damage' is 1, 'no damage' is 0
+y_test_class = np.where(y_test == 'damage', 1, 0)
+
+# Now, use your model to predict probabilities on the test set
+# Assuming 'X_test_rnn' is already defined and shaped correctly
+y_pred_probs = classification_model.predict(X_test_rnn)
+
+# Choose a decision threshold (if you've found an optimal one, use that, otherwise use 0.5)
+threshold = 0.5  # or optimal_threshold if you have one
+
+# Convert predicted probabilities into binary class predictions
+y_pred_class = (y_pred_probs > threshold).astype(int)
+
+# Compute the confusion matrix
+cm = confusion_matrix(y_test_class, y_pred_class)
+
+# Plot the confusion matrix
+disp = ConfusionMatrixDisplay(confusion_matrix=cm)
+disp.plot(cmap=plt.cm.Blues)
+plt.title('Confusion Matrix')
+plt.show()
+
+# Calculate the ROC curve and AUC
+fpr, tpr, _ = roc_curve(y_test_class, y_pred_probs)
+roc_auc = auc(fpr, tpr)
+
+# Plot the ROC curve
+plt.figure()
+plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
+plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+plt.xlabel('False Positive Rate')
+plt.ylabel('True Positive Rate')
+plt.title('Receiver Operating Characteristic')
+plt.legend(loc='lower right')
+plt.show()
+
+# Reshape data from 2D to 3D (samples, timesteps, features)
+X_train_rnn = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
+
+# Define the LSTM model for binary classification
+lstm_model = Sequential([
+    LSTM(50, input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])),  # Correct input_shape
+    Dense(1, activation='sigmoid')
+])
+
+# Compile the LSTM model
+lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+
+# Train the LSTM model
+history = lstm_model.fit(X_train_rnn, y_train_class, epochs=100, validation_split=0.2)
+
+# ... (Continue with the rest of the code as before)
+
+# Assuming your data is correctly reshaped to 3D for LSTM and 'y_train_class' holds the binary labels
+
+# Continue training the LSTM model
+history = lstm_model.fit(
+    X_train_rnn,
+    y_train_class,
+    epochs=100,
+    validation_split=0.2
+)
+
+# Assuming 'X_test' and 'y_test' are your test data and labels, respectively
+# Reshape the test data to match the input shape of the model
+X_test_rnn = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
+
+# Predict class probabilities on the test set
+y_pred_probs = lstm_model.predict(X_test_rnn)
+
+# Choose a decision threshold
+threshold = 0.5  # Adjust based on your optimal threshold
+y_pred_class = (y_pred_probs > threshold).astype(int)
+
+# Compute the confusion matrix
+cm = confusion_matrix(y_test_class, y_pred_class)
+
+# Display the confusion matrix
+disp = ConfusionMatrixDisplay(confusion_matrix=cm)
+disp.plot(cmap=plt.cm.Blues)
+plt.title('Confusion Matrix - LSTM Model')
+plt.show()
+
+# Compute ROC curve and AUC
+fpr, tpr, _ = roc_curve(y_test_class, y_pred_probs)
+roc_auc = auc(fpr, tpr)
+
+# Plot ROC curve
+plt.figure()
+plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
+plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+plt.xlabel('False Positive Rate')
+plt.ylabel('True Positive Rate')
+plt.title('ROC Curve - LSTM Model')
+plt.legend(loc="lower right")
+plt.show()
+
+# Check the range of predicted probabilities
+print("Predicted probabilities:", y_pred_probs)
+
+# Check if there are both classes present in the test labels
+print("Unique labels in y_test_class:", np.unique(y_test_class))
+
+# Check unique values in the labels array
+unique_classes = np.unique(y)
+print("Unique classes in y:", unique_classes)
+
+# Define a threshold to convert continuous values to binary classification
+threshold = 0.5  # This is just an example, adjust this based on your domain knowledge
+y_class = (y > threshold).astype(int)
+
+# Now check the distribution of the new binary labels
+print("Distribution of binary labels:", np.bincount(y_class))
+
+# Continue with the train-test split with the new binary labels
+X_train, X_test, y_train_class, y_test_class = train_test_split(
+    X, y_class,
+    test_size=0.2,
+    stratify=y_class,
+    random_state=42
+)
+
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import confusion_matrix, roc_curve, auc, ConfusionMatrixDisplay
+import matplotlib.pyplot as plt
+
+# Perform the stratified train-test split with the new binary labels
+X_train, X_test, y_train_class, y_test_class = train_test_split(
+    X, y_class,
+    test_size=0.2,
+    stratify=y_class,
+    random_state=42
+)
+
+# Train the logistic regression model
+logistic_model = LogisticRegression()
+logistic_model.fit(X_train, y_train_class)
+
+# Predict class probabilities on the test set
+y_pred_probs_logistic = logistic_model.predict_proba(X_test)[:, 1]
+
+# Predict class labels for the test set based on the default threshold of 0.5
+y_pred_class_logistic = logistic_model.predict(X_test)
+
+# Compute the confusion matrix
+cm_logistic = confusion_matrix(y_test_class, y_pred_class_logistic)
+
+# Display the confusion matrix
+disp = ConfusionMatrixDisplay(confusion_matrix=cm_logistic)
+disp.plot(cmap=plt.cm.Blues)
+plt.title('Confusion Matrix - Logistic Regression Model')
+plt.show()
+
+# Compute ROC curve and AUC
+fpr_logistic, tpr_logistic, _ = roc_curve(y_test_class, y_pred_probs_logistic)
+roc_auc_logistic = auc(fpr_logistic, tpr_logistic)
+
+# Plot the ROC curve
+plt.figure()
+plt.plot(fpr_logistic, tpr_logistic, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc_logistic)
+plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+plt.xlabel('False Positive Rate')
+plt.ylabel('True Positive Rate')
+plt.title('ROC Curve - Logistic Regression Model')
+plt.legend(loc="lower right")
+plt.show()
+
+# Reshape X_test_rnn to match the expected input shape of the model (10 time steps, 1 feature per step)
+X_test_rnn_reshaped = X_test_rnn.reshape((X_test_rnn.shape[0], 10, 1))
+
+# Now make predictions with the reshaped data
+y_pred_probs_rnn = rnn_model.predict(X_test_rnn_reshaped)
+
+# Continue with the rest of the code for evaluation
+
+print("Number of samples in test set:", y_test_class.shape[0])
+print("Number of predictions made:", y_pred_probs_rnn.shape[0])
+
+
+
+
+
+##Using LSTM to better predict
+
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dense
+
+# Define the LSTM model for binary classification
+lstm_classification_model = Sequential([
+    LSTM(2, input_shape=(X_train.shape[1], 1)),  # 50 LSTM units
+    Dense(1, activation='sigmoid')  # Sigmoid activation for binary classification
+])
+
+# Compile the LSTM model
+lstm_classification_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+
+# Train the LSTM model
+history = lstm_classification_model.fit(
+    X_train_rnn, y_train_class,
+    epochs=100,
+    validation_split=0.2
+)
+
+# Save the LSTM classification model
+lstm_classification_model.save('/content/lstm_classification_model.h5')
+
+#Model Evaluation
+
+# Evaluate the model's performance
+test_loss, test_accuracy = lstm_classification_model.evaluate(X_test_rnn, y_test_binary)
+
+# Print the evaluation results
+print(f"Test Loss: {test_loss}")
+print(f"Test Accuracy: {test_accuracy}")
+
+#Regularizing to make LSTM better
+
+from sklearn.utils.class_weight import compute_class_weight
+
+# Calculate class weights for unbalanced datasets
+class_weights = compute_class_weight(
+    class_weight='balanced',
+    classes=np.unique(y_train_class),
+    y=y_train_class
+)
+
+# Create a dictionary mapping class labels to weights
+weight_for_class_1 = class_weights[1]
+class_weight_dict = {0: class_weights[0], 1: class_weights[1]}
+
+# Train the model with class weight to handle imbalance
+history = lstm_classification_model.fit(
+    X_train_rnn, y_train_class,
+    epochs=100,
+    validation_split=0.2,
+    class_weight=class_weight_dict  # Use the computed class weights
+)
+
+# Plot the training history
+plt.figure(figsize=(14, 5))
+
+# Plot training & validation accuracy values
+plt.subplot(1, 2, 1)
+plt.plot(history.history['accuracy'])
+plt.plot(history.history['val_accuracy'])
+plt.title('Model accuracy')
+plt.xlabel('Epoch')
+plt.ylabel('Accuracy')
+plt.legend(['Train', 'Test'], loc='upper left')
+
+# Plot training & validation loss values
+plt.subplot(1, 2, 2)
+plt.plot(history.history['loss'])
+plt.plot(history.history['val_loss'])
+plt.title('Model loss')
+plt.xlabel('Epoch')
+plt.ylabel('Loss')
+plt.legend(['Train', 'Test'], loc='upper left')
+
+plt.show()
+
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dense, Dropout
+from sklearn.utils.class_weight import compute_class_weight
+
+# Calculate class weights for unbalanced datasets
+classes = np.unique(y_train_class)
+class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train_class)
+class_weights_dict = dict(zip(classes, class_weights))
+
+# Define the LSTM model with dropout for regularization
+model = Sequential([
+    LSTM(30, input_shape=(X_train.shape[1], 1), dropout=0.2, recurrent_dropout=0.2),
+    Dropout(0.5),
+    Dense(1, activation='sigmoid')
+])
+
+# Compile the model with a possibly smaller learning rate and class weight
+model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+
+# Train the model with class weights to handle imbalance
+history = model.fit(
+    X_train_rnn, y_train_class,
+    epochs=50,
+    validation_split=0.2,
+    class_weight=class_weights_dict,
+    batch_size=32  # Consider trying different batch sizes
+)
+
+# Evaluate the model to see if the performance has improved
+test_loss, test_accuracy = model.evaluate(X_test_rnn, y_test_binary)
+print(f"Test Loss: {test_loss}")
+print(f"Test Accuracy: {test_accuracy}")
+
+# Predict classes using the trained model
+y_pred_class = (model.predict(X_test_rnn) > 0.5).astype(int)
+
+# Generate the new confusion matrix
+cm = confusion_matrix(y_test_binary, y_pred_class)
+
+# Plot the confusion matrix
+disp = ConfusionMatrixDisplay(confusion_matrix=cm)
+disp.plot(cmap=plt.cm.Blues)
+plt.title('Confusion Matrix')
+plt.show()
+
+from sklearn.model_selection import TimeSeriesSplit
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout
+from sklearn.metrics import confusion_matrix, roc_curve, auc
+
+# Define the number of splits
+n_splits = 5
+tscv = TimeSeriesSplit(n_splits=n_splits)
+
+# To store metrics for each fold
+confusion_matrices = []
+roc_auc_scores = []
+
+for train_index, test_index in tscv.split(X_normalized):
+    X_train_cv, X_test_cv = X_normalized[train_index], X_normalized[test_index]
+    y_train_cv, y_test_cv = y_binary[train_index], y_binary[test_index]
+
+    # Reshape the data for LSTM network
+    X_train_cv_rnn = X_train_cv.reshape((X_train_cv.shape[0], X_train_cv.shape[1], 1))
+    X_test_cv_rnn = X_test_cv.reshape((X_test_cv.shape[0], X_test_cv.shape[1], 1))
+
+    # Define the model (as before)
+    model = Sequential([
+        Bidirectional(LSTM(50, input_shape=(X_train_cv_rnn.shape[1], 1))),
+        Dropout(0.5),
+        Dense(1, activation='sigmoid')
+    ])
+
+    # Compile the model (as before)
+    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+
+    # Fit the model
+    model.fit(X_train_cv_rnn, y_train_cv, epochs=100, batch_size=32, verbose=0)  # Set verbose to 0 to suppress output
+
+    # Predict probabilities
+    y_pred_probs = model.predict(X_test_cv_rnn).ravel()
+
+    # Binarize predictions based on threshold
+    threshold = 0.5  # This threshold can be adjusted
+    y_pred_class = (y_pred_probs > threshold).astype(int)
+
+    # Calculate metrics for this fold
+    cm = confusion_matrix(y_test_cv, y_pred_class)
+    confusion_matrices.append(cm)
+
+    fpr, tpr, thresholds = roc_curve(y_test_cv, y_pred_probs)
+    roc_auc = auc(fpr, tpr)
+    roc_auc_scores.append(roc_auc)
+
+# Now, you can calculate the average of the metrics across all folds
+# Average Confusion Matrix
+average_cm = np.mean(confusion_matrices, axis=0)
+print("Average Confusion Matrix:\n", average_cm)
+
+# Average ROC AUC Score
+average_roc_auc = np.mean(roc_auc_scores)
+print("Average ROC AUC Score:", average_roc_auc)
+
+import numpy as np
+from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
+import matplotlib.pyplot as plt
+
+# Assume 'model' is your trained Keras model
+# Predict probabilities for the test set
+y_pred_probs = model.predict(X_test_rnn)
+
+# Function to apply threshold to probabilities to create binary predictions
+def apply_threshold(probs, threshold):
+    return (probs > threshold).astype(int)
+
+# Choose a range of thresholds to try
+thresholds = np.linspace(0, 1, 101)
+
+# Plot confusion matrices for various thresholds
+fig, axes = plt.subplots(nrows=10, ncols=10, figsize=(20, 20))  # Adjust the subplot grid as needed
+axes = axes.flatten()  # Flatten to 1D array for easy iteration
+
+for ax, threshold in zip(axes, thresholds):
+    # Get binary predictions using the current threshold
+    y_pred_class = apply_threshold(y_pred_probs, threshold)
+
+    # Compute the confusion matrix for this threshold
+    cm = confusion_matrix(y_test_binary, y_pred_class)
+
+    # Plot the confusion matrix
+    ConfusionMatrixDisplay(confusion_matrix=cm).plot(cmap=plt.cm.Blues, ax=ax)
+    ax.title.set_text(f'Thr {threshold:.2f}')
+
+plt.tight_layout()  # Adjust spacing
+plt.show()
+
+from sklearn.metrics import roc_curve, auc
+import matplotlib.pyplot as plt
+
+# Assume 'model' is your trained Keras model and you have a test set 'X_test_rnn'
+# Predict probabilities for the positive class (damage)
+y_pred_probs = model.predict(X_test_rnn).ravel()
+
+# Compute ROC curve and ROC area for each class
+fpr, tpr, thresholds = roc_curve(y_test_binary, y_pred_probs)
+roc_auc = auc(fpr, tpr)
+
+# Plot the ROC curve
+plt.figure()
+plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
+plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+plt.xlim([0.0, 1.0])
+plt.ylim([0.0, 1.05])
+plt.xlabel('False Positive Rate')
+plt.ylabel('True Positive Rate')
+plt.title('Receiver Operating Characteristic')
+plt.legend(loc="lower right")
+plt.show()
+
+##using a bidirectional LSTM model
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout
+
+# Define the bidirectional LSTM model
+bidirectional_lstm_model = Sequential([
+    Bidirectional(LSTM(50, return_sequences=True), input_shape=(X_train.shape[1], 1)),
+    Dropout(0.5),
+    Bidirectional(LSTM(50)),
+    Dropout(0.5),
+    Dense(1, activation='sigmoid')
+])
+
+# Compile the model
+bidirectional_lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+
+# Train the model
+history = bidirectional_lstm_model.fit(
+    X_train_rnn, y_train_class,
+    epochs=100,
+    validation_split=0.2
+)
+
+from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc
+import matplotlib.pyplot as plt
+
+# Make predictions on the test data
+y_pred_probs = bidirectional_lstm_model.predict(X_test_rnn)
+y_pred_class = (y_pred_probs > 0.5).astype(int)
+
+# Confusion Matrix
+cm = confusion_matrix(y_test_binary, y_pred_class)
+disp = ConfusionMatrixDisplay(confusion_matrix=cm)
+disp.plot(cmap=plt.cm.Blues)
+plt.title('Confusion Matrix')
+plt.show()
+
+# ROC Curve
+fpr, tpr, thresholds = roc_curve(y_test_binary, y_pred_probs)
+roc_auc = auc(fpr, tpr)
+
+plt.figure()
+plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
+plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+plt.xlim([0.0, 1.0])
+plt.ylim([0.0, 1.05])
+plt.xlabel('False Positive Rate')
+plt.ylabel('True Positive Rate')
+plt.title('Receiver Operating Characteristic')
+plt.legend(loc="lower right")
+plt.show()
+
+from sklearn.model_selection import TimeSeriesSplit
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout
+from sklearn.metrics import confusion_matrix, roc_curve, auc
+
+# Define the number of splits
+n_splits = 5
+tscv = TimeSeriesSplit(n_splits=n_splits)
+
+# To store metrics for each fold
+confusion_matrices = []
+roc_auc_scores = []
+
+for train_index, test_index in tscv.split(X_normalized):
+    X_train_cv, X_test_cv = X_normalized[train_index], X_normalized[test_index]
+    y_train_cv, y_test_cv = y_binary[train_index], y_binary[test_index]
+
+    # Reshape the data for LSTM network
+    X_train_cv_rnn = X_train_cv.reshape((X_train_cv.shape[0], X_train_cv.shape[1], 1))
+    X_test_cv_rnn = X_test_cv.reshape((X_test_cv.shape[0], X_test_cv.shape[1], 1))
+
+    # Define the model (as before)
+    model = Sequential([
+        Bidirectional(LSTM(50, input_shape=(X_train_cv_rnn.shape[1], 1))),
+        Dropout(0.5),
+        Dense(1, activation='sigmoid')
+    ])
+
+    # Compile the model (as before)
+    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+
+    # Fit the model
+    model.fit(X_train_cv_rnn, y_train_cv, epochs=100, batch_size=32, verbose=0)  # Set verbose to 0 to suppress output
+
+    # Predict probabilities
+    y_pred_probs = model.predict(X_test_cv_rnn).ravel()
+
+    # Binarize predictions based on threshold
+    threshold = 0.5  # This threshold can be adjusted
+    y_pred_class = (y_pred_probs > threshold).astype(int)
+
+    # Calculate metrics for this fold
+    cm = confusion_matrix(y_test_cv, y_pred_class)
+    confusion_matrices.append(cm)
+
+    fpr, tpr, thresholds = roc_curve(y_test_cv, y_pred_probs)
+    roc_auc = auc(fpr, tpr)
+    roc_auc_scores.append(roc_auc)
+
+# Now, you can calculate the average of the metrics across all folds
+# Average Confusion Matrix
+average_cm = np.mean(confusion_matrices, axis=0)
+print("Average Confusion Matrix:\n", average_cm)
+
+# Average ROC AUC Score
+average_roc_auc = np.mean(roc_auc_scores)
+print("Average ROC AUC Score:", average_roc_auc)
+
+
+
+
+
+
+