[85c1ab]: / DNAm_to_CNA.ipynb

Download this file

1 lines (1 with data), 23.0 kB

{"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.8"},"colab":{"name":"DNAm_to_CNA.ipynb","provenance":[],"collapsed_sections":[],"toc_visible":true},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","metadata":{"id":"4yPGgizG1KTK","colab_type":"text"},"source":["# Deep denoising auto-encoder and MLP based multi-output regression on TCGA multi-omics data\n","# DNA Methylation to Copy Number Alteration"]},{"cell_type":"markdown","metadata":{"id":"kJy_pIri1KTQ","colab_type":"text"},"source":["# Setting environment"]},{"cell_type":"markdown","metadata":{"id":"oJJ2zyZo1KTT","colab_type":"text"},"source":["![](http://)Seeding the random number generators"]},{"cell_type":"code","metadata":{"trusted":true,"id":"XQ5DPaTo1KTX","colab_type":"code","colab":{}},"source":["# have reproducible behavior for certain hash-based operations.\n","import os\n","os.environ['PYTHONHASHSEED'] = '0'\n","# The below is necessary for starting Numpy generated random numbers\n","# in a well-defined initial state.\n","import numpy as np\n","np.random.seed(42)\n","# The below is necessary for starting core Python generated random numbers\n","# in a well-defined state.\n","import random as rn\n","rn.seed(12345)\n","\n","# The below tf.set_random_seed() will make random number generation\n","# in the TensorFlow backend have a well-defined initial state.\n","import tensorflow as tf\n","tf.set_random_seed(1234)\n","\n","# Force TensorFlow to use single thread.\n","session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,\n","                              inter_op_parallelism_threads=1)\n","from keras import backend as K\n","sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)\n","K.set_session(sess)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4Nl334Dw1KTj","colab_type":"text"},"source":["Importing libraries"]},{"cell_type":"code","metadata":{"trusted":true,"id":"4GDVVcfj1KTn","colab_type":"code","colab":{}},"source":["from keras.layers import Input, Dense, Dropout\n","from keras.models import Model\n","from sklearn.preprocessing import MinMaxScaler\n","from sklearn.model_selection import train_test_split\n","from sklearn.metrics import mean_squared_error\n","from keras import regularizers\n","import matplotlib\n","import pandas as pd\n","import matplotlib.pyplot as plt\n","from sklearn.metrics import r2_score"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"eYGuFsqqZkMX","colab_type":"text"},"source":["# Loading data"]},{"cell_type":"markdown","metadata":{"id":"tq5QN3Kq0BkP","colab_type":"text"},"source":["Importing data from pre-processed csv files (Change paths accordingly)"]},{"cell_type":"code","metadata":{"id":"cmJBB9flgeAU","colab_type":"code","colab":{}},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"of_LBPo4gsAu","colab_type":"code","colab":{}},"source":["#ls \"/content/drive/My Drive\""],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"2R8C7R7bhruf","colab_type":"code","colab":{}},"source":["preprocessed_DNAMeth = pd.read_csv('/content/drive/My Drive/TCGA Data/Preprocessed_Data/LIHC_preprocessed_DNAMeth.csv')\n","preprocessed_CNA = pd.read_csv('/content/drive/My Drive/TCGA Data/Preprocessed_Data/LIHC_preprocessed_CNA.csv')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"trusted":true,"id":"Sc8zl9ko1KUD","colab_type":"code","colab":{}},"source":["x = preprocessed_DNAMeth\n","y = preprocessed_CNA"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"psvXlpKr1KUK","colab_type":"text"},"source":["Splitting the data into training and testing datasets"]},{"cell_type":"code","metadata":{"trusted":true,"id":"_HrtO3s41KUM","colab_type":"code","colab":{}},"source":["x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"23LoYaMm1KUU","colab_type":"text"},"source":["Scaling the data within [0-1] range"]},{"cell_type":"code","metadata":{"trusted":true,"id":"e3da-HjV1KUW","colab_type":"code","colab":{}},"source":["scalar = MinMaxScaler()\n","x_train = scalar.fit_transform(x_train)\n","x_test = scalar.transform(x_test)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"vY3jlrsK1KUf","colab_type":"text"},"source":["Adding gaussian noise"]},{"cell_type":"code","metadata":{"trusted":true,"id":"GA9KkTC11KUi","colab_type":"code","colab":{}},"source":["noise_factor = 0.5\n","x_train_noisy = x_train + noise_factor * np.random.normal(0.0, 1.0, x_train.shape)\n","x_test_noisy = x_test + noise_factor * np.random.normal(0.0, 1.0, x_test.shape)\n","\n","x_train_noisy = np.clip(x_train_noisy, 0., 1.)\n","x_test_noisy = np.clip(x_test_noisy, 0., 1.)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"g3Hz3xNL1KUz","colab_type":"text"},"source":["# Dimension Reduction/Feature Extraction using DDAE"]},{"cell_type":"markdown","metadata":{"id":"FHTP3t0G1KUp","colab_type":"text"},"source":["Setting the no. of input and output neurons"]},{"cell_type":"code","metadata":{"trusted":true,"id":"xEq-HmEb1KUs","colab_type":"code","colab":{}},"source":["num_in_neurons = x.shape[1]\n","num_out_neurons = y.shape[1]"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"trusted":true,"id":"Nt8lJ5en1KU7","colab_type":"code","colab":{}},"source":["# Auto-encoder to extract features from DNA Methylation data\n","\n","with tf.device('/gpu:0'):\n","  \n","    # this is the size of our encoded representations\n","    encoding_dim1 = 500\n","    encoding_dim2 = 200\n","    \n","    lambda_act = 0.0001\n","    lambda_weight = 0.001\n","    # this is our input placeholder\n","    input_data = Input(shape=(num_in_neurons,))\n","    # first encoded representation of the input\n","    encoded = Dense(encoding_dim1, activation='relu', activity_regularizer=regularizers.l1(lambda_act), kernel_regularizer=regularizers.l2(lambda_weight), name='encoder1')(input_data)\n","    # second encoded representation of the input\n","    encoded = Dense(encoding_dim2, activation='relu', activity_regularizer=regularizers.l1(lambda_act), kernel_regularizer=regularizers.l2(lambda_weight), name='encoder2')(encoded)\n","    # first lossy reconstruction of the input\n","    decoded = Dense(encoding_dim1, activation='relu', name='decoder1')(encoded)\n","    # the final lossy reconstruction of the input\n","    decoded = Dense(num_in_neurons, activation='sigmoid', name='decoder2')(decoded)\n","    \n","    # this model maps an input to its reconstruction\n","    autoencoder = Model(inputs=input_data, outputs=decoded)\n","    \n","    myencoder = Model(inputs=input_data, outputs=encoded)\n","    autoencoder.compile(optimizer='sgd', loss='mse')\n","    # training\n","    print('training the autoencoder')\n","    autoencoder.fit(x_train_noisy, x_train,\n","                    epochs=25,\n","                    batch_size=8,\n","                    shuffle=True,\n","                    validation_data=(x_test_noisy, x_test))\n"," \n","    ae_train = myencoder.predict(x_train)\n","    ae_test = myencoder.predict(x_test)\n","    autoencoder.trainable = False   #freeze autoencoder weights   "],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"exEaTXSS1KVE","colab_type":"text"},"source":["# Regression using MLP"]},{"cell_type":"code","metadata":{"trusted":true,"id":"-tlKltAL1KVG","colab_type":"code","colab":{}},"source":["# MLP Multi-output Regression code goes here...\n","\n","num_hidden = encoding_dim2\n","with tf.device('/gpu:0'):\n","# create regression model\n","    x = autoencoder.get_layer('encoder2').output\n","    x = Dropout(0.2)(x)             # adding 20% dropout\n","    h = Dense(int(num_hidden * 3), activation='relu', name='hidden1')(x)\n","    h = Dropout(0.5)(h)             # adding 50% dropout\n","    h = Dense(int(num_hidden * 5), activation='relu', name='hidden2')(h)\n","    h = Dropout(0.5)(h)             # adding 50% dropout\n","    y = Dense(num_out_neurons, activation='linear', name='prediction')(h)\n","    mlpRegressor = Model(inputs=autoencoder.inputs, outputs=y)\n","\n","    # Compile model\n","    mlpRegressor.compile(loss='mse', optimizer='adam', metrics=['accuracy'])    # or loss='mae'\n","    # Fit the model\n","    print('training the MLP multi-output regressor')\n","    mlpRegressor.fit(x_train, y_train, epochs=50, batch_size=8)\n","    y_pred = mlpRegressor.predict(x_test)\n","    actual_mean = pd.DataFrame(y_test.mean(axis=0))\n","    pred_mean = pd.DataFrame(y_pred.mean(axis=0))"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"vEZjBeBd1KVK","colab_type":"text"},"source":["# Results"]},{"cell_type":"code","metadata":{"trusted":true,"id":"F8ekvk241KVM","colab_type":"code","colab":{}},"source":["print('MSE: (Actual Vs. Predicted)', mean_squared_error(y_test, y_pred))\n","print('r^2 value: (Mean of actual Vs. Mean of Predicted)', r2_score(actual_mean, pred_mean))"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"YaOwP8Hs1KVU","colab_type":"text"},"source":["Plotting actual Vs. predicted CNVs"]},{"cell_type":"code","metadata":{"trusted":true,"id":"npFNBizz1KVV","colab_type":"code","colab":{}},"source":["act=actual_mean.values.flatten()\n","pred=pred_mean.values.flatten()\n","\n","s1 = pd.Series(act)\n","s2 = pd.Series(pred)\n","\n","plt.figure(figsize=(20,10))\n","ax = plt.subplot(111)\n","plt.title('Average of actual and predicted CNVs across all samples')\n","plt.xlabel('No. of features (genes)')\n","plt.ylabel('Average of CNVs across samples')\n","ax.plot(s1, 'b--', label='Actual')\n","ax.plot(s2, 'r--', label='Predicted')\n","ax.legend()\n","plt.grid(True)\n","plt.show()\n"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"x8P7_hQX1KVZ","colab_type":"text"},"source":["Plotting correlation scatter plot for mean of actual Vs. mean of predicted CNVs"]},{"cell_type":"code","metadata":{"trusted":true,"id":"oQosUCvP1KVc","colab_type":"code","colab":{}},"source":["plt.figure(figsize=(20,10))\n","plt.scatter(actual_mean, pred_mean)\n","plt.title('Correlation between mean of actual and mean of predicted CNVs across all samples')\n","plt.xlabel('Average of actual values of CNVs across samples')\n","plt.ylabel('Average of predicted values of CNVs across samples')\n","plt.grid(True)\n","plt.show()\n"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"PlMF1mzt1KVh","colab_type":"text"},"source":["# Comparing regression results with other standard methods"]},{"cell_type":"markdown","metadata":{"id":"PTZsfz7K1KVj","colab_type":"text"},"source":["Evaluating the mean of actual y values"]},{"cell_type":"code","metadata":{"trusted":true,"id":"doeAJ1Qv1KVl","colab_type":"code","colab":{}},"source":["actual_mean = pd.DataFrame(y_test.mean(axis=0))"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"1LrshVjR1KVt","colab_type":"text"},"source":["# 1. Linear Regression"]},{"cell_type":"markdown","metadata":{"id":"S6L9bGTX1KVu","colab_type":"text"},"source":["Importing libraries"]},{"cell_type":"code","metadata":{"trusted":true,"id":"sgdKvZuq1KVw","colab_type":"code","colab":{}},"source":["from sklearn.metrics import r2_score \n","from sklearn.metrics import mean_squared_error\n","from sklearn.linear_model import LinearRegression"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"qdOOAl6K1KV2","colab_type":"text"},"source":["Multi-output regression using Linear Regression (OLS) (sk-learn)"]},{"cell_type":"code","metadata":{"trusted":true,"id":"NAlqajK31KV5","colab_type":"code","colab":{}},"source":["with tf.device('/gpu:0'):\n","    linear_Regr = LinearRegression(normalize=True)\n","    linear_Regr.fit(x_train, y_train)\n","    y_pred = linear_Regr.predict(x_test)\n","    pred_mean = pd.DataFrame(y_pred.mean(axis=0))\n","    y_mse=mean_squared_error(y_test, y_pred)\n","    y_r2score=r2_score(actual_mean, pred_mean)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"trusted":true,"id":"i7aOibZk1KV_","colab_type":"code","colab":{}},"source":["print(\"Mean Squared Error (y_test Vs. y_pred): \", y_mse)\n","print(\"r2 Score (y_test_mean Vs. y_pred_mean): \", y_r2score)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"XiQAkdOY1KWE","colab_type":"text"},"source":["# 2. Lasso"]},{"cell_type":"markdown","metadata":{"id":"lfoHiUTD1KWG","colab_type":"text"},"source":["Importing libraries"]},{"cell_type":"code","metadata":{"trusted":true,"id":"L0X4qs1W1KWK","colab_type":"code","colab":{}},"source":["from sklearn.metrics import r2_score \n","from sklearn.metrics import mean_squared_error\n","from sklearn.linear_model import Lasso"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"QKBXjHXS1KWN","colab_type":"text"},"source":["Multi-output regression using Lasso (sk-learn)"]},{"cell_type":"code","metadata":{"trusted":true,"id":"rcjEMZNw1KWR","colab_type":"code","colab":{}},"source":["y_mse=[]\n","y_r2score=[]\n","with tf.device('/gpu:0'):\n","        for alp in [0.01,0.1,0.5,1,1.5]:\n","        #for alp in [0.001]:\n","            print('Working with alpha=',alp)\n","            Lasso_Regr = Lasso(alpha=alp)\n","            Lasso_Regr.fit(x_train, y_train)\n","            y_pred = Lasso_Regr.predict(x_test)\n","            pred_mean = pd.DataFrame(y_pred.mean(axis=0))\n","            y_mse.append(mean_squared_error(y_test, y_pred))\n","            y_r2score.append(r2_score(actual_mean, pred_mean))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"trusted":true,"id":"JgKSXmFO1KWf","colab_type":"code","colab":{}},"source":["print(\"Mean Squared Error (y_test Vs. y_pred): \", y_mse)\n","print(\"r2 Score (y_test_mean Vs. y_pred_mean): \", y_r2score)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"44ES6URt1KWl","colab_type":"text"},"source":["# 3. Ridge"]},{"cell_type":"markdown","metadata":{"id":"KdRZhf6E1KWn","colab_type":"text"},"source":["Importing libraries"]},{"cell_type":"code","metadata":{"trusted":false,"id":"UGv5FWKl1KWo","colab_type":"code","colab":{}},"source":["from sklearn.linear_model import Ridge"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"wDQ7Dwo01KWs","colab_type":"text"},"source":["Multi-output regression using Ridge (sk-learn)"]},{"cell_type":"code","metadata":{"trusted":false,"id":"d0WKZRcM1KWu","colab_type":"code","colab":{}},"source":["y_mse=[]\n","y_r2score=[]\n","with tf.device('/gpu:0'):\n","    for alp in [0.01,0.1,0.5,1,1.5]:\n","        Ridge_Regr = Ridge(alpha=alp, normalize=True)\n","        Ridge_Regr.fit(x_train, y_train)\n","        y_pred = Ridge_Regr.predict(x_test)\n","        pred_mean = pd.DataFrame(y_pred.mean(axis=0))\n","        y_mse.append(mean_squared_error(y_test, y_pred))\n","        y_r2score.append(r2_score(actual_mean, pred_mean))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"trusted":false,"id":"HrGru9dd1KWy","colab_type":"code","colab":{}},"source":["print(\"Mean Squared Error (y_test Vs. y_pred): \", y_mse)\n","print(\"r2 Score (y_test_mean Vs. y_pred_mean): \", y_r2score)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"V5ybPdbu1KW3","colab_type":"text"},"source":["# 4. PCA - Random Forest (PCA-RF)"]},{"cell_type":"markdown","metadata":{"id":"40adSHS01KW4","colab_type":"text"},"source":["Importing libraries"]},{"cell_type":"code","metadata":{"trusted":true,"id":"jFDisRpK1KW5","colab_type":"code","colab":{}},"source":["from sklearn.ensemble import RandomForestRegressor\n","from sklearn.decomposition import PCA"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"trusted":true,"id":"uTsE5_zz1KW-","colab_type":"code","colab":{}},"source":["n=200\n","pca = PCA(n_components=n)\n","pca.fit(x_train)\n","x_train = pca.transform(x_train)\n","x_test = pca.transform(x_test)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"E5zTXz2Y1KXB","colab_type":"text"},"source":["Multi-output regression using Random Forest (sk-learn)"]},{"cell_type":"code","metadata":{"trusted":true,"id":"Y8c5Ze_Q1KXD","colab_type":"code","colab":{}},"source":["y_mse=[]\n","y_r2score=[]\n","with tf.device('/gpu:0'):\n","    for est in [10,50,100,150,200]:\n","        rf_Regr = RandomForestRegressor(n_estimators=est, n_jobs=-1)\n","        rf_Regr.fit(x_train, y_train)\n","        y_pred = rf_Regr.predict(x_test)\n","        pred_mean = pd.DataFrame(y_pred.mean(axis=0))\n","        y_mse.append(mean_squared_error(y_test, y_pred))\n","        y_r2score.append(r2_score(actual_mean, pred_mean))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"trusted":true,"id":"0P-XM_A-1KXH","colab_type":"code","colab":{}},"source":["print(\"Mean Squared Error: \", y_mse)\n","print(\"r2 Score (y_test_mean Vs. y_pred_mean): \", y_r2score)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"KdsphQq11KXL","colab_type":"text"},"source":["# 5. k-Nearest Neighbor (kNN)"]},{"cell_type":"markdown","metadata":{"id":"ZNBeKQxs1KXM","colab_type":"text"},"source":["Importing libraries"]},{"cell_type":"code","metadata":{"trusted":false,"id":"Tb3CcPgs1KXO","colab_type":"code","colab":{}},"source":["from sklearn.neighbors import KNeighborsRegressor"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Aos1QK7Y1KXQ","colab_type":"text"},"source":["Multi-output regression using kNN (sk-learn)"]},{"cell_type":"code","metadata":{"trusted":false,"id":"Xw_lhX711KXS","colab_type":"code","colab":{}},"source":["y_mse=[]\n","y_r2score=[]\n","with tf.device('/gpu:0'):\n","    for k in [5,10,15,20,25]:\n","        knn_Regr = KNeighborsRegressor(n_neighbors=k, n_jobs=-1)\n","        knn_Regr.fit(x_train, y_train)\n","        y_pred = knn_Regr.predict(x_test)\n","        pred_mean = pd.DataFrame(y_pred.mean(axis=0))\n","        y_mse.append(mean_squared_error(y_test, y_pred))\n","        y_r2score.append(r2_score(actual_mean, pred_mean))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"trusted":false,"colab_type":"code","id":"j1EswA1rLP5_","colab":{}},"source":["print(\"Mean Squared Error: \", y_mse)\n","print(\"r2 Score (y_test_mean Vs. y_pred_mean): \", y_r2score)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"8PRE5gcm1KXY","colab_type":"text"},"source":["# 6. PCA - Support Vector Regression (PCA-SVR)"]},{"cell_type":"markdown","metadata":{"id":"7ikI6Ykg1KXa","colab_type":"text"},"source":["Importing libraries"]},{"cell_type":"code","metadata":{"trusted":true,"id":"bAnKuDpC1KXb","colab_type":"code","colab":{}},"source":["from sklearn.svm import SVR\n","from sklearn.multioutput import MultiOutputRegressor\n","from sklearn.decomposition import PCA"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"trusted":true,"id":"Qz4nVUso1KXf","colab_type":"code","colab":{}},"source":["n=200\n","pca = PCA(n_components=n)\n","pca.fit(x_train)\n","x_train = pca.transform(x_train)\n","x_test = pca.transform(x_test)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"0KpX9Lkv1KXn","colab_type":"text"},"source":["Multi-output regression using kNN (sk-learn)"]},{"cell_type":"code","metadata":{"trusted":true,"id":"BM88x5Rr1KXp","colab_type":"code","colab":{}},"source":["with tf.device('/gpu:0'):\n","    y_mse=[]\n","    y_r2score = []\n","    for k in ['linear','poly','rbf','sigmoid']:\n","        print('kernel = ',k)\n","        mo_svr = MultiOutputRegressor(SVR(kernel=k,gamma='auto'))\n","        mo_svr.fit(x_train, y_train)\n","        y_pred = mo_svr.predict(x_test)\n","        pred_mean = pd.DataFrame(y_pred.mean(axis=0))\n","        y_mse.append(mean_squared_error(y_test, y_pred))\n","        y_r2score.append(r2_score(actual_mean, pred_mean))"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"trusted":false,"id":"rhHTUwKh1KXu","colab_type":"code","colab":{}},"source":["print(\"Mean Squared Error: \", y_mse)\n","print(\"r2 Score (y_test_mean Vs. y_pred_mean): \", y_r2score)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"MMnVNr4b1KXx","colab_type":"text"},"source":["# Comparison of AE-MLP with PCA-MLP"]},{"cell_type":"markdown","metadata":{"id":"-YDvihGz1KXz","colab_type":"text"},"source":["Importing libraries"]},{"cell_type":"code","metadata":{"trusted":true,"id":"dfl4WGAe1KX0","colab_type":"code","colab":{}},"source":["from sklearn.decomposition import PCA"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"nK7X3r8a1KX5","colab_type":"text"},"source":["PCA-MLP"]},{"cell_type":"code","metadata":{"trusted":true,"id":"8whS3ow91KX7","colab_type":"code","colab":{}},"source":["n=200\n","with tf.device('/gpu:0'):\n","    #pca\n","    pca = PCA(n_components=n)\n","    pca.fit(x_train)\n","    x_train = pca.transform(x_train)\n","    x_test = pca.transform(x_test)\n","    \n","    # MLP Multi-output Regression code goes here...\n","    # create regression model\n","    num = n\n","    input_data = Input(shape=(num,))\n","    x = Dropout(0.2)(input_data)             # adding 20% dropout\n","    h = Dense(int(num * 3), activation='relu', name='hidden1')(x)\n","    h = Dropout(0.5)(h)                      # adding 50% dropout\n","    h = Dense(int(num * 5), activation='relu', name='hidden2')(h)\n","    h = Dropout(0.5)(h)                      # adding 50% dropout\n","    y = Dense(num_out_neurons, activation='linear', name='prediction')(h)\n","    mlpRegressor = Model(inputs=input_data, outputs=y)\n","    \n","    # Compile model\n","    mlpRegressor.compile(loss='mse', optimizer='adam', metrics=['accuracy'])    # or loss='mae'\n","    # Fit the model\n","    print('training the MLP multi-output regressor')\n","    mlpRegressor.fit(x_train, y_train, epochs=50, batch_size=8)\n","    y_pred = mlpRegressor.predict(x_test)\n","    actual_mean = pd.DataFrame(y_test.mean(axis=0))\n","    pred_mean = pd.DataFrame(y_pred.mean(axis=0))"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Z7NHrncA1KYB","colab_type":"text"},"source":["Printing results"]},{"cell_type":"code","metadata":{"trusted":true,"id":"5bvDjhAk1KYC","colab_type":"code","colab":{}},"source":["print('MSE: (Actual Vs. Predicted)', mean_squared_error(y_test, y_pred))\n","print('r^2 value: (Mean of actual Vs. Mean of Predicted)', r2_score(actual_mean, pred_mean))"],"execution_count":0,"outputs":[]}]}