--- a +++ b/src/ckpt_QML.ipynb @@ -0,0 +1,254 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Quantum machine learning on lower-dimensional single-cell RNAseq data\n", + "\n", + "\n", + "This notebook evaluates the following quantum machine learning models:\n", + "\n", + "* Quantum Support Vector Machine (QSVC) https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.QSVC.html\n", + "* Pegasos QSVC: https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.PegasosQSVC.html\n", + "* Neural Networks: https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.NeuralNetworkClassifier.html\n", + "* Variational Quantum Classifier (VQC): https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.VQC.html\n", + "\n", + "\n", + "It takes as input the lower dimensional embedding of the single-cell RNAseq data with eight dimension of the melanoma minimal residual diseases sample and predicts drug-administered melanoma v/s phase II of minimal residual disease. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ====== Base class imports ======\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from glob import glob\n", + "import matplotlib\n", + "import os\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns \n", + "sns.set_style('dark')\n", + "\n", + "# ====== Scikit-learn imports ======\n", + "\n", + "from sklearn.svm import SVC\n", + "from sklearn.metrics import (\n", + " auc,\n", + " roc_curve,\n", + " ConfusionMatrixDisplay,\n", + " f1_score,\n", + " balanced_accuracy_score,\n", + ")\n", + "from sklearn.preprocessing import StandardScaler, LabelBinarizer\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# ====== Qiskit imports ======\n", + "\n", + "from qiskit import QuantumCircuit\n", + "from qiskit.circuit import Parameter\n", + "from qiskit.circuit.library import RealAmplitudes, ZZFeatureMap\n", + "from qiskit_algorithms.optimizers import COBYLA, L_BFGS_B\n", + "from qiskit_machine_learning.algorithms.classifiers import NeuralNetworkClassifier, VQC\n", + "from qiskit_machine_learning.neural_networks import SamplerQNN, EstimatorQNN\n", + "from qiskit_machine_learning.circuit.library import QNNCircuit\n", + "from qiskit.circuit.library import ZZFeatureMap, ZFeatureMap, PauliFeatureMap\n", + "from qiskit_aer import AerSimulator\n", + "from qiskit_ibm_runtime import QiskitRuntimeService\n", + "from qiskit_algorithms.utils import algorithm_globals\n", + "from qiskit.primitives import Sampler\n", + "from qiskit_ibm_runtime.fake_provider import FakeManilaV2\n", + "from qiskit_algorithms.state_fidelities import ComputeUncompute\n", + "from qiskit_machine_learning.kernels import FidelityQuantumKernel\n", + "from qiskit_machine_learning.algorithms import QSVC, PegasosQSVC\n", + "from qiskit.transpiler.preset_passmanagers import generate_preset_pass_manager\n", + "\n", + "\n", + "## ====== Torch imports ======\n", + "import torch\n", + "import torch.nn as nn \n", + "import pytorch_lightning as pl \n", + "from torchmetrics.classification import F1Score\n", + "import torch.optim as optim\n", + "from lightning.pytorch.utilities.types import OptimizerLRScheduler\n", + "import torch.utils.data\n", + "from pytorch_lightning.loggers import TensorBoardLogger\n", + "import lightning, lightning.pytorch.loggers\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# load checkpoint\n", + "ckpt_path = '/dccstor/boseukb/Q/ML/checkpoints/GSE116237_forQ_iter'\n", + "fname = os.path.basename(ckpt_path)\n", + "all_checkpoints = []\n", + "for fname in glob('/dccstor/boseukb/Q/ML/checkpoints/GSE116237_forQ_iter*/**/*.ckpt', recursive=True):\n", + " all_checkpoints.append(fname)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def compute_svc(X_train, y_train, X_test, y_test, c = 1):\n", + " svc = SVC(C=c)\n", + " # y_train = torch.argmax(torch.tensor(y_train, dtype=torch.float32),dim=1)\n", + " # y_test = torch.argmax(torch.tensor(y_test, dtype=torch.float32),dim=1)\n", + " svc_vanilla = svc.fit(X_train, y_train)\n", + " labels_vanilla = svc_vanilla.predict(X_test)\n", + " f1_svc = f1_score(y_test, labels_vanilla, average='micro')\n", + " \n", + " return f1_svc\n", + " \n", + "def compute_QSVC(X_train, y_train, X_test, y_test, encoding='ZZ', c = 1, pegasos=False):\n", + " \n", + " #service = QiskitRuntimeService(instance=\"accelerated-disc/internal/default\") \n", + " service = QiskitRuntimeService() \n", + " backend = AerSimulator(method='statevector')\n", + " algorithm_globals.random_seed = 12345\n", + "\n", + " feature_map = None\n", + "\n", + " if encoding == 'ZZ' :\n", + " feature_map = ZZFeatureMap(feature_dimension=X_train.shape[1], \n", + " reps=2, \n", + " entanglement='linear')\n", + " else: \n", + " if encoding == 'Z': \n", + " feature_map = ZFeatureMap(feature_dimension=X_train.shape[1], \n", + " reps=2)\n", + " if encoding == 'P': \n", + " feature_map = PauliFeatureMap(feature_dimension=X_train.shape[1], \n", + " reps=2, entanglement='linear')\n", + "\n", + " sampler = Sampler() \n", + " fidelity = ComputeUncompute(sampler=sampler)\n", + " Qkernel = FidelityQuantumKernel(fidelity=fidelity, feature_map=feature_map)\n", + " f1_qsvc = QSVC(quantum_kernel=Qkernel, C=c)\n", + " \n", + " f1_peg_qsvc = 0\n", + " if pegasos == True: \n", + " peg_qsvc = PegasosQSVC(quantum_kernel=Qkernel, C=c)\n", + " peg_qsvc_model = peg_qsvc.fit(X_train, y_train)\n", + " labels_peg_qsvc = peg_qsvc_model.predict(X_test)\n", + " f1_peg_qsvc = f1_score(y_test, labels_peg_qsvc, average='micro')\n", + "\n", + " return f1_qsvc,f1_peg_qsvc\n", + "\n", + "def compute_estimator_QNN(X_train, y_train, X_test, y_test, primitive: str):\n", + " \n", + " if primitive == 'estimator':\n", + " # construct QNN with the QNNCircuit's default ZZFeatureMap feature map and RealAmplitudes ansatz.\n", + " qc_qnn = QNNCircuit(num_qubits=X_train.shape[1])\n", + "\n", + " estimator_qnn = EstimatorQNN(circuit=qc_qnn)\n", + " # QNN maps inputs to [-1, +1]\n", + " estimator_qnn.forward(X_train[0, :], algorithm_globals.random.random(estimator_qnn.num_weights))\n", + " # construct neural network classifier\n", + " estimator_classifier = NeuralNetworkClassifier(estimator_qnn, optimizer=COBYLA(maxiter=100))\n", + " # fit classifier to data\n", + " estimator_classifier.fit(X_train, y_train)\n", + " f1_score_estimator_qnn = estimator_classifier.score(X_test, y_test)\n", + " return f1_score_estimator_qnn\n", + " \n", + " if primitive == 'sampler':\n", + " # construct a quantum circuit from the default ZZFeatureMap feature map and a customized RealAmplitudes ansatz\n", + " qc_sampler = QNNCircuit(ansatz=RealAmplitudes(X_train.shape[1], reps=1))\n", + " # parity maps bitstrings to 0 or 1\n", + " def parity(x):\n", + " return \"{:b}\".format(x).count(\"1\") % 2\n", + " output_shape = 2 # corresponds to the number of classes, possible outcomes of the (parity) mapping\n", + " # construct QNN\n", + " sampler_qnn = SamplerQNN(circuit=qc_sampler, interpret=parity,output_shape=output_shape,)\n", + " # construct classifier\n", + " sampler_classifier = NeuralNetworkClassifier(neural_network=sampler_qnn, optimizer=COBYLA(maxiter=100))\n", + " # fit classifier to data\n", + " sampler_classifier.fit(X_train, y_train)\n", + " f1_score_sampler_qnn = sampler_classifier.score(X_test, y_test)\n", + " return f1_score_sampler_qnn" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.01\n", + "0.01\n" + ] + } + ], + "source": [ + "results_dict = {}\n", + "for iter in range(25):\n", + " matches = [x for x in all_checkpoints if \"iter\"+str(iter)+\"_\" in x]\n", + " #iter_num = os.path.basename(all_checkpoints[0]).split('_')[2]\\n\",\n", + " x_train = np.load([x for x in matches if \"train_embedding\" in x][0])\n", + " x_test = np.load([x for x in matches if \"test_embedding\" in x][0])\n", + " y_train = np.load([x for x in matches if \"train_target\" in x][0])\n", + " y_test = np.load([x for x in matches if \"test_target\" in x][0])\n", + "\n", + " f1_svc = compute_svc(x_train,\n", + " y_train,\n", + " x_test,\n", + " y_test,\n", + " c=10)\n", + " \n", + " f1_qsvc, f1_peg_qsvc = compute_QSVC(x_train, \n", + " y_train, \n", + " x_test,\n", + " y_test,\n", + " c=10,\n", + " pegasos=1,\n", + " )\n", + " \n", + " f1_qsvc= compute_QNN(x_train, \n", + " y_train, \n", + " x_test,\n", + " y_test,\n", + " c=10,\n", + " pegasos=1,\n", + " )\n", + "results_dict[iter] = [f1_svc, f1_qsvc, f1_peg_qsvc]\n", + "df = pd.DataFrame.from_dict(results_dict, orient='index')\n", + "df.to_csv('/dccstor/boseukb/Q/ML/v2/results_comparison.csv', index=False, header=['SVC', 'QSVC', 'PEGQSVC'])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}