MLforSHM / Git / [af72a4] /Code/finalc.py

Models:
Robert-Orr/
MLforSHM
Downloads: 1
[af72a4]: / Code / finalc.py
History
Download this file
421 lines (328 with data), 21.5 kB

#IMPORTING NECESSARY LIBRARIES
import matplotlib.pyplot as plt #import matplotlib
from matplotlib.pyplot import figure #import figure from matplotlib.pyplot
import numpy as np #import numpy
import tensorflow as tf #import tensorflow
import tensorflow_datasets as tfds #import tensorflow_datasets

from tensorflow import keras #import keras
import pandas as pd #import pandas
from scipy.fftpack import irfft, rfft #import rfft and irfft functions

print("importing datasets")

#IMPORTING THE DATASET pure.xlsx
df = pd.read_excel('pure.xlsx',header=None) #import pure dataset
pure_acc = df.to_numpy()

#IMPORTING THE DATASET noisy.xlsx
df = pd.read_excel('noisy.xlsx',header=None)#import noisy dataset
noisy_acc = df.to_numpy() #convert to numpy array

#IMPORTING THE DATASET test_pure.xlsx
df = pd.read_excel('test_pure.xlsx',header=None)#import test_pure dataset
test_pure_acc = df.to_numpy()#convert to numpy array

#IMPORTING THE DATASET test_noisy.xlsx
df = pd.read_excel('test_noisy.xlsx',header=None)#import test_noisy dataset
test_noisy_acc = df.to_numpy()#convert to numpy array

print("RANDOMIZING ORDER OF DATASET TO AVOID BIAS")
#RANDOMIZING ORDER OF DATASET TO AVOID BIAS
n = np.linspace(0,noisy_acc.shape[0]-1,noisy_acc.shape[0],dtype=int) #create array from 0 to 700
np.random.shuffle(n) #shauffle the ARRAY
temp_noisy = noisy_acc #save noisy dataset in temporary array
temp_pure = pure_acc #save pure dataset in temporary array

for i in n:
  noisy_acc[i] = temp_noisy[n[i]]#randomizing noisy datasets
  pure_acc[i] = temp_pure[n[i]]#randomizing pure dataset

print("DATA PREPROCESSING, CONVERTING TRAINING DATA TO FREQUENCY DOMAIN")

#DATA PREPROCESSING, CONVERTING TRAINING DATA TO FREQUENCY DOMAIN
noisy_acc_freq = np.zeros((noisy_acc.shape[0],noisy_acc.shape[1]),np.float64) #create array of zeros of same shape as noisy_acc
for i in range(noisy_acc.shape[0]): #looping in noisy_acc ARRAY
  noisy_acc_freq[i] = rfft(noisy_acc[i]) #converting all signals to freq domin

pure_acc_freq = np.zeros((pure_acc.shape[0],pure_acc.shape[1]),np.float64) #create array of zeros of same shape as pure_acc
for i in range(pure_acc.shape[0]): #looping in pure_acc ARRAY
  pure_acc_freq[i] = rfft(pure_acc[i])#converting all signals to freq domin

noisy_acc_freq = noisy_acc_freq.reshape(noisy_acc_freq.shape[0],noisy_acc_freq.shape[1],1)#RESHAPING THE NOISY ACCELEROMETER ARRAY

print("MODELING THE CNN FOR DENOISING")

#MODELING THE CNN FOR DENOISING
# linear activations are used as unlike regression it is a pattern to pattern matching
model1 = keras.Sequential([ #CREATING sequential model
   keras.layers.ZeroPadding1D(padding=3), #zeropadding layer
   keras.layers.Conv1D(16, 7, strides=1, activation='linear'), #convolutional layer filter size=7, no of filters=16
   keras.layers.ZeroPadding1D(padding=8), #zeropadding layer
   keras.layers.Conv1D(32, 3, strides=1, activation='linear'),#convolutional layer filter size=3, no of filters=32
   keras.layers.Conv1D(32, 3, strides=1, activation='linear'),#convolutional layer filter size=3, no of filters=32
   keras.layers.Conv1D(32, 3, strides=1, activation='linear'),#convolutional layer filter size=3, no of filters=32
   keras.layers.Conv1D(16, 3, strides=1, activation='linear'),#convolutional layer filter size=3, no of filters=16
   keras.layers.Conv1D(16, 3, strides=1, activation='linear'),#convolutional layer filter size=3, no of filters=16
   keras.layers.Conv1D(16, 3, strides=1, activation='linear'),#convolutional layer filter size=3, no of filters=16
   keras.layers.Flatten(), #flatten layer
   keras.layers.Dense(16, activation='linear'),#fully connected layer, size = 16
   keras.layers.Dense(pure_acc_freq.shape[1], activation=None)#fully connected layer size=size of signal
])

optim = tf.keras.optimizers.Adam(3e-4) #using adam optimizer, learning rate=3 x 10^-4

model1.compile(optimizer=optim,  loss = 'mse',metrics=[tf.keras.metrics.RootMeanSquaredError('rmse')]) #defining loss function and optimizer functions

model1.fit(noisy_acc_freq, pure_acc_freq, epochs=100, batch_size=16) #training model1 for 100 epochs on batch size of 16

print("MODELING THE ANN FOR DENOISING")
#MODELING THE ANN FOR DENOISING
# linear activations are used as unlike regression it is a pattern to pattern matching
model2 = keras.Sequential([#CREATING sequential model
   keras.layers.Flatten(),#flatten layer
   keras.layers.Dense(4096, activation='linear'), #fully connected layer, size = 4096
   keras.layers.Dense(8192, activation='linear'),#fully connected layer, size = 8192
   keras.layers.Dense(4096, activation='linear'),#fully connected layer, size = 4096
   keras.layers.Dense(2048, activation='linear'),#fully connected layer, size = 2048
   keras.layers.Dense(pure_acc_freq.shape[1], activation=None)#fully connected layer, size = size of signal
])

optim = tf.keras.optimizers.SGD(1e-3)#using SGD optimizer, learning rate=1 x 10^-3
#the momentum aspect of Adam caused it to spiral out of control

model2.compile(optimizer=optim, loss = 'mse', metrics=[tf.keras.metrics.RootMeanSquaredError('rmse')])#defining loss function and optimizer functions

model2.fit(noisy_acc_freq, pure_acc_freq, epochs=100, batch_size=12)#training model2 for 100 epochs on batch size 12

print("SAVING THE MODEL")
#SAVING THE MODEL
model1.save('cnn1.h5')#SAVING MODEL1
model2.save('ann1.h5')#SAVING MODEL2
new_model = tf.keras.models.load_model('cnn1.h5') #loading CNN under the variable new_model

print("COMPARING THE PURE, NOISY AND DENOISED SIGNALS USING MATPLOTLIB")
#COMPARING THE PURE, NOISY AND DENOISED SIGNALS USING MATPLOTLIB
z = test_noisy_acc[1] #z has signal no. 1038 from the test_noisy dataset.
z= rfft(z) # converting z to freq DOMAIN
z = z.reshape(1,z.shape[0],1) #reshaping z ARRAY
y_denoised = new_model.predict(z) #denoising z using new_model
y_denoised = irfft(y_denoised) #converting z back to time DOMAIN
figure(num=None, figsize=(10, 6), dpi=80, facecolor='w', edgecolor='k') #defining size and shape of the plot
x = np.linspace(start=0,stop=7,num=701) #creating array= {0,0.01,0.02....6.98,6.99,7}
y_noisy = test_noisy_acc[1]  #plotting the noisy SIGNAL
plt.plot(x,y_noisy)             # plotting NOISY SIGNAL
figure(num=None, figsize=(10, 6), dpi=80, facecolor='w', edgecolor='k')#defining size and shape of the plot
plt.plot(x,pure_acc[1])      #plotting PURE SIGNAL
figure(num=None, figsize=(10, 6), dpi=80, facecolor='w', edgecolor='k')#defining size and shape of the plot
plt.plot(x,y_denoised.reshape(701))     #plotting DENOISED SIGNAL

print("CALCULATING IDR BY CALCULATING RELATIVE DISPLACEMENTS BY DOUBLE INTEGRATION:pure")
#NOW THAT OUR ML ALGORITHM IS WORKING FINE, WE WILL MOVE ON TO EVALUATING IT USING [IO,LS,CP]
#CALCULATING IDR BY CALCULATING RELATIVE DISPLACEMENTS BY DOUBLE INTEGRATION. AND FINALLY CREATING [IO,LS,CP] ARRAY FOR FULL PURE #DATASET
time = 1/360 #time taken between 2 readings. Sampling rate = 360Hz

pure_classification = np.zeros((int(test_pure_acc.shape[0]*0.5),3),np.float64) #creatng array of zeros for classinfiying the pure signals

v = np.zeros(test_pure_acc.shape,np.float64) #velocity
disp = np.zeros(test_pure_acc.shape,np.float64) #displacement
floor_height = 2.75 #standard floor height in India = 2.75m


for i in range(0,test_pure_acc.shape[0]): #iterating over the test_pure acceleration signals
  for j in range(1,test_pure_acc.shape[1]):   #going over each sample
      v[i][j] = v[i][j-1] + (((test_pure_acc[i][j-1]+test_pure_acc[i][j])/2) * (time)) #integrating for velocity

for i in range(0,test_pure_acc.shape[0]):#iterating over the velocity
  for j in range(1,test_pure_acc.shape[1]):  #going over each sample
      disp[i][j] = disp[i][j-1] + (((v[i][j-1]+v[i][j])/2) * (time)) #integrating for displacement

for i in range(0,disp.shape[0],2): #going over adjacent displacement arrays
  idr = np.zeros(disp.shape[1],np.float64) #creating array of zeros for IDRs

  for j in range(disp.shape[1]): #going over adjacent displacement arrays
    idr[j] = ( np.abs(disp[i][j]-disp[i+1][j]) )/(floor_height) #calculating IDRs

  '''
 if idr < 0.007       => Immediate Occupancy
 if idr 0.007 to 0.05 => Life Safety
 if idr >0.05         => Collapse prevention
 '''
  scores=np.array([0,0,0]) #scores
  '''
 io_score=0th index
 ls_score=1st index
 cp_score=2nd index
 '''
  for k in range (idr.shape[0]):#calculating scores for function/array of IDRs
    if idr[k]<0.007:#if IDR<0.007
      scores[0]+=1 #increment scores[0] by 1
    elif idr[k]>0.05:#if IDR>0.05
      scores[2]+=1 #increment scores[2] by 1
    else:#if 0.007<IDR<0.05
      scores[1]+=1#increment scores[2] by 1

  #most severe score is considered for labeling the dataset
  if scores[2]>0:#if scores[2]>0:
    scores = [0,0,1]#scores becomes [0,0,1]
  elif scores[1]>0:#if scores[1]>0 and scores[2]=0:
    scores = [0,1,0]#scores becomes [0,1,0]
  else: #if scores[0]>0 and scores[1]=0 and scores[2]=0
    scores = [1,0,0]#scores becomes [1,0,0]

  pure_classification[int(i/2)]=scores #adding scores to pure_classification array
  scores=np.array([0,0,0]) #resetting scores array

#FOR NOISY CLASSIFICATION

print("CALCULATING IDR BY CALCULATING RELATIVE DISPLACEMENTS BY DOUBLE INTEGRATION:noisy")
time = 1/360 #time taken between 2 readings. Sampling rate = 360Hz

noisy_classification = np.zeros((int(test_noisy_acc.shape[0]*0.5),3),np.float64)#creatng array of zeros for classinfiying the noisy signals

v = np.zeros(test_noisy_acc.shape,np.float64) #velocity
disp = np.zeros(test_noisy_acc.shape,np.float64) #displacement


for i in range(0,test_noisy_acc.shape[0]):#iterating over the test_noisy acceleration signals
  for j in range(1,test_noisy_acc.shape[1]):  #going over each sample
      v[i][j] = v[i][j-1] + (((test_noisy_acc[i][j-1]+test_noisy_acc[i][j])/2) * (time)) #integrating for velocity

for i in range(0,test_noisy_acc.shape[0]):#iterating over the velocity signals
  for j in range(1,test_noisy_acc.shape[1]):#going over each sample
      disp[i][j] = disp[i][j-1] + (((v[i][j-1]+v[i][j])/2) * (time))#integrating for displacement

for i in range(0,disp.shape[0],2): #going over adjacent displacement arrays
  idr = np.zeros(disp.shape[1],np.float64)#creating array of zeros for IDRs

  for j in range(disp.shape[1]): #going over adjacent displacement arrays
    idr[j] = ( np.abs(disp[i][j]-disp[i+1][j]) )/(floor_height)#calculating IDRs

  '''
 if idr < 0.007       => Immediate Occupancy
 if idr 0.007 to 0.05 => Life Safety
 if idr >0.05         => Collapse prevention
 '''
  scores=np.array([0,0,0])#scores
  '''
 io_score=0th index
 ls_score=1st index
 cp_score=2nd index
 '''
  for k in range (idr.shape[0]):#calculating scores for function/array of IDRs
    if idr[k]<0.007:#if IDR<0.007
      scores[0]+=1 #increment scores[0] by 1
    elif idr[k]>0.05:#if IDR>0.05
      scores[2]+=1 #increment scores[2] by 1
    else:#if 0.007<IDR<0.05
      scores[1]+=1#increment scores[2] by 1

#most severe score is considered for labeling the dataset
  if scores[2]>0:#if scores[2]>0:
    scores = [0,0,1]#scores becomes [0,0,1]
  elif scores[1]>0:#if scores[1]>0 and scores[2]=0:
    scores = [0,1,0]#scores becomes [0,1,0]
  else: #if scores[0]>0 and scores[1]=0 and scores[2]=0
    scores = [1,0,0]#scores becomes [1,0,0]

  noisy_classification[int(i/2)]=scores#adding scores to noisy_classification array
  scores=np.array([0,0,0])#resetting scores array

#calculating accuracy without denoising.

b=0 # b=0
for i in range(pure_classification.shape[0]):#iterating over pure_classification
  if pure_classification[i][0] == noisy_classification[i][0] and pure_classification[i][1] == noisy_classification[i][1] and pure_classification[i][2] == noisy_classification[i][2] : #if pure and noisy classifications match
    b+=1 #increment b by 1

print("accuracy without denoising = ",end="") #priniting text
print(np.float64(b)/pure_classification.shape[0])#printing accuracy. divide b by the total number of classifications

print("denoising noisy test set")
#DENOISING TEST_NOISY_ACC
test_denoised_acc = np.zeros((test_noisy_acc.shape[0],test_noisy_acc.shape[1]),np.float64) # create empty array of zeros to store denoised signals of test_noisy
for i in range(test_noisy_acc.shape[0]): #iterate over test_noisy dataset
  z = rfft(test_noisy_acc[i]) #convert to freq DOMAIN
  z = z.reshape(1,701,1) #reshape the array containing the signal
  test_denoised_acc[i] = irfft(new_model.predict(z))#denoise the signal and convert it back to time domain an store it.


print("CALCULATING IDR BY CALCULATING RELATIVE DISPLACEMENTS BY DOUBLE INTEGRATION:denoised")
#GETTING CLASSIFICATION ARRAY FOR DENOISED SIGNALS
time = 1/360 #time taken between 2 readings. Sampling rate = 360Hz

denoised_classification = np.zeros((int(test_denoised_acc.shape[0]*0.5),3),np.float64)#creatng array of zeros for classinfiying the denoised signals

v = np.zeros(test_denoised_acc.shape,np.float64) #velocity
disp = np.zeros(test_denoised_acc.shape,np.float64) #displacement


for i in range(0,test_denoised_acc.shape[0]):#iterating over the test_denoised signals
  for j in range(1,test_denoised_acc.shape[1]):   #going over each sample
      v[i][j] = v[i][j-1] + (((test_denoised_acc[i][j-1]+test_denoised_acc[i][j])/2) * (time))#integrating for velocity

for i in range(0,test_denoised_acc.shape[0]):#iterating over the velocity signals
  for j in range(1,test_denoised_acc.shape[1]):   #going over each sample
      disp[i][j] = disp[i][j-1] + (((v[i][j-1]+v[i][j])/2) * (time))#integrating for displacement

for i in range(0,disp.shape[0],2):#going over adjacent displacement array
  idr = np.zeros(disp.shape[1],np.float64)#creating array of zeros for IDRs

  for j in range(disp.shape[1]):#going over adjacent displacement arrays
    idr[j] = ( np.abs(disp[i][j]-disp[i+1][j]) )/(floor_height)#calculating IDRs

  '''
 if idr < 0.007       => Immediate Occupancy
 if idr 0.007 to 0.05 => Life Safety
 if idr >0.05         => Collapse prevention
 '''
  scores=np.array([0,0,0])#scores
  '''
 io_score=0th index
 ls_score=1st index
 cp_score=2nd index
 '''
  for k in range (idr.shape[0]):#calculating scores for function/array of IDRs
    if idr[k]<0.007:#if IDR<0.007
      scores[0]+=1 #increment scores[0] by 1
    elif idr[k]>0.05:#if IDR>0.05
      scores[2]+=1 #increment scores[2] by 1
    else:#if 0.007<IDR<0.05
      scores[1]+=1#increment scores[2] by 1

#most severe score is considered for labeling the dataset
  if scores[2]>0:#if scores[2]>0:
    scores = [0,0,1]#scores becomes [0,0,1]
  elif scores[1]>0:#if scores[1]>0 and scores[2]=0:
    scores = [0,1,0]#scores becomes [0,1,0]
  else: #if scores[0]>0 and scores[1]=0 and scores[2]=0
    scores = [1,0,0]#scores becomes [1,0,0]

  denoised_classification[int(i/2)]=scores#adding scores to denoised_classification array
  scores=np.array([0,0,0])#resetting scores array

#calculating accuracy with denoising.

b=0 # b=0
for i in range(pure_classification.shape[0]):#iterating over pure_classification
  if pure_classification[i][0] == denoised_classification[i][0] and pure_classification[i][1] == denoised_classification[i][1] and pure_classification[i][2] == denoised_classification[i][2] : #if pure and denoised classifications match
    b+=1 #increment b by 1

print("accuracy with denoising = ",end="") #priniting text
print(np.float64(b)/pure_classification.shape[0])#printing accuracy. divide b by the total number of classifications


print("DIRECT CLASSIFICATION USING ACCELEROMETER DATA")
##DIRECT CLASSIFICATION USING ACCELEROMETER DATA

#RELATIVE ACCELERATION CALCULATION
rel_acc_noisy = np.zeros((int(noisy_acc.shape[0]*0.5),noisy_acc.shape[1]),dtype=np.float64) # Creating array to store relative accelerations between noisy data
rel_acc_pure = np.zeros((int(pure_acc.shape[0]*0.5),pure_acc.shape[1]),dtype=np.float64) # Creating array to store relative accelerations between pure data

a=0 #a is assigned to 0

for i in range(rel_acc_pure.shape[0]):
  rel_acc_noisy[i] = noisy_acc[a+1] - noisy_acc[a] #evaluating the relative accelerations between adjacent noisy_acc
  rel_acc_pure[i]  = pure_acc[a+1] - pure_acc[a] #evaluating the relative accelerations between adjacent pure_acc
  a+=2 #incrementing a by 2 to get the next 2 accelerations
rel_acc_noisy = rel_acc_noisy.reshape(rel_acc_noisy.shape[0],rel_acc_noisy.shape[1],1) #reshaping the array

print("Getting the pure classifications from pure_acc")
### Getting the pure classifications from pure_acc
#For relative acceleration calculation we assume 2 simultaneous accelerometer-time signals

time = 1/360 #time taken between 2 readings. Sampling rate = 360Hz

pure_classification2 = np.zeros((int(pure_acc.shape[0]*0.5),3),np.float64) #creatng array of zeros for classinfiying the pure training signals

v = np.zeros(pure_acc.shape,np.float64) #velocity
disp = np.zeros(pure_acc.shape,np.float64) #displacement


for i in range(0,pure_acc.shape[0]): #iterating over the training pure signals
  for j in range(1,pure_acc.shape[1]):  #going over each sample
      v[i][j] = v[i][j-1] + (((pure_acc[i][j-1]+pure_acc[i][j])/2) * (time)) #integrating for velocity

for i in range(0,pure_acc.shape[0]): #iterating over velocities
  for j in range(1,pure_acc.shape[1]):   #double integration.
      disp[i][j] = disp[i][j-1] + (((v[i][j-1]+v[i][j])/2) * (time))#integrating for displacement

for i in range(0,disp.shape[0],2):#going over adjacent displacement array
  idr = np.zeros(disp.shape[1],np.float64)#creating array of zeros for IDRs

  for j in range(disp.shape[1]):#going over adjacent displacement arrays
    idr[j] = ( np.abs(disp[i][j]-disp[i+1][j]) )/(floor_height)#calculating IDRs

  '''
 if idr < 0.007       => Immediate Occupancy
 if idr 0.007 to 0.05 => Life Safety
 if idr >0.05         => Collapse prevention
 '''
  scores=np.array([0,0,0])#scores
  '''
 io_score=0th index
 ls_score=1st index
 cp_score=2nd index
 '''
  for k in range (idr.shape[0]):#calculating scores for function/array of IDRs
    if idr[k]<0.007:#if IDR<0.007
      scores[0]+=1 #increment scores[0] by 1
    elif idr[k]>0.05:#if IDR>0.05
      scores[2]+=1 #increment scores[2] by 1
    else:#if 0.007<IDR<0.05
      scores[1]+=1#increment scores[2] by 1

  scores = np.floor(scores/(np.amax(scores))) #putting 1 in position that has maximum score and 0 in other positions
  pure_classification2[int(i/2)]=scores #adding scores to denoised_classification array
  scores=np.array([0,0,0])#resetting scores array

print("CNN FOR DIRECT CLASSIFICATION")
###CNN FOR DIRECT CLASSIFICATION
model3 = keras.Sequential([ #sequential model
   keras.layers.Conv1D(32, 32, strides=1, activation='relu'), #convolutional layer, kernal size=32, number of filters = 32
   keras.layers.Conv1D(32, 32, strides=1, activation='relu'),#convolutional layer, kernal size=32, number of filters = 32
   keras.layers.MaxPool1D(pool_size=8),#maxpool layer, pooling filtersize = 8
   keras.layers.Flatten(), #flatten layer
   keras.layers.Dense(512, activation='relu'),#fully connected layer, size = 512
   keras.layers.Dense(512, activation='relu'), #fully connected layer, size = 512
   keras.layers.Dense(3, activation='softmax')]) #fully connected layer, size = 3 to get output as one of the 3 classes

model3.compile(loss=keras.losses.categorical_crossentropy,optimizer=keras.optimizers.SGD(),metrics=['accuracy']) # creating model using SGD optimizer and catagorical cross-entropy loss function.

model3.fit(rel_acc_noisy, pure_classification2, epochs=5, batch_size=16) # training the model. 5 epochs , batch size of 16

print("ANN FOR DIRECT CLASSIFICATION")
#ANN FOR DIRECT CLASSIFICATION
model4 = keras.Sequential([ #sequential model
   keras.layers.Flatten(),#flatten layer
   keras.layers.Dense(512, activation='relu'),#fully connected layer, size = 512
   keras.layers.Dense(512, activation='relu'),#fully connected layer, size = 512
   keras.layers.Dense(512, activation='relu'),#fully connected layer, size = 512
   keras.layers.Dense(3, activation='softmax')])#fully connected layer, size = 3 to get output as one of the 3 classes

model4.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.SGD(),metrics=['accuracy']) # creating model using SGD optimizer and catagorical cross-entropy loss function.

model4.fit(rel_acc_noisy, pure_classification2, epochs=5, batch_size=4)# training the model. 5 epochs , batch size of 4

print("final accuracy testing on unseen data!")
e = np.zeros((int(test_noisy_acc.shape[0]*0.5),test_noisy_acc.shape[1]),dtype=np.float64) #creating empty array  for storing test_noisy dataset's relative accelerations.
#e is the relative acceleration for the test_noisy_acc
a=0 # a   =   0

for i in range(e.shape[0]): # iterating over test_noisy dataset
  e[i] = test_noisy_acc[a+1] - test_noisy_acc[a] # evaluating relative acceletation and storing it in e
  a+=2 # increment a by 2 to get the next 2 accelerations.
e = e.reshape(e.shape[0],e.shape[1],1) # reshape the array e.

#EVALUATING MODELS
model3.evaluate(e,pure_classification) # evaluating the performance of the CNN on unseen data