255 lines (254 with data), 11.0 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Quantum machine learning on lower-dimensional single-cell RNAseq data\n",
"\n",
"\n",
"This notebook evaluates the following quantum machine learning models:\n",
"\n",
"* Quantum Support Vector Machine (QSVC) https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.QSVC.html\n",
"* Pegasos QSVC: https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.PegasosQSVC.html\n",
"* Neural Networks: https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.NeuralNetworkClassifier.html\n",
"* Variational Quantum Classifier (VQC): https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.VQC.html\n",
"\n",
"\n",
"It takes as input the lower dimensional embedding of the single-cell RNAseq data with eight dimension of the melanoma minimal residual diseases sample and predicts drug-administered melanoma v/s phase II of minimal residual disease. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# ====== Base class imports ======\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"from glob import glob\n",
"import matplotlib\n",
"import os\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns \n",
"sns.set_style('dark')\n",
"\n",
"# ====== Scikit-learn imports ======\n",
"\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import (\n",
" auc,\n",
" roc_curve,\n",
" ConfusionMatrixDisplay,\n",
" f1_score,\n",
" balanced_accuracy_score,\n",
")\n",
"from sklearn.preprocessing import StandardScaler, LabelBinarizer\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"# ====== Qiskit imports ======\n",
"\n",
"from qiskit import QuantumCircuit\n",
"from qiskit.circuit import Parameter\n",
"from qiskit.circuit.library import RealAmplitudes, ZZFeatureMap\n",
"from qiskit_algorithms.optimizers import COBYLA, L_BFGS_B\n",
"from qiskit_machine_learning.algorithms.classifiers import NeuralNetworkClassifier, VQC\n",
"from qiskit_machine_learning.neural_networks import SamplerQNN, EstimatorQNN\n",
"from qiskit_machine_learning.circuit.library import QNNCircuit\n",
"from qiskit.circuit.library import ZZFeatureMap, ZFeatureMap, PauliFeatureMap\n",
"from qiskit_aer import AerSimulator\n",
"from qiskit_ibm_runtime import QiskitRuntimeService\n",
"from qiskit_algorithms.utils import algorithm_globals\n",
"from qiskit.primitives import Sampler\n",
"from qiskit_ibm_runtime.fake_provider import FakeManilaV2\n",
"from qiskit_algorithms.state_fidelities import ComputeUncompute\n",
"from qiskit_machine_learning.kernels import FidelityQuantumKernel\n",
"from qiskit_machine_learning.algorithms import QSVC, PegasosQSVC\n",
"from qiskit.transpiler.preset_passmanagers import generate_preset_pass_manager\n",
"\n",
"\n",
"## ====== Torch imports ======\n",
"import torch\n",
"import torch.nn as nn \n",
"import pytorch_lightning as pl \n",
"from torchmetrics.classification import F1Score\n",
"import torch.optim as optim\n",
"from lightning.pytorch.utilities.types import OptimizerLRScheduler\n",
"import torch.utils.data\n",
"from pytorch_lightning.loggers import TensorBoardLogger\n",
"import lightning, lightning.pytorch.loggers\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# load checkpoint\n",
"ckpt_path = '/dccstor/boseukb/Q/ML/checkpoints/GSE116237_forQ_iter'\n",
"fname = os.path.basename(ckpt_path)\n",
"all_checkpoints = []\n",
"for fname in glob('/dccstor/boseukb/Q/ML/checkpoints/GSE116237_forQ_iter*/**/*.ckpt', recursive=True):\n",
" all_checkpoints.append(fname)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def compute_svc(X_train, y_train, X_test, y_test, c = 1):\n",
" svc = SVC(C=c)\n",
" # y_train = torch.argmax(torch.tensor(y_train, dtype=torch.float32),dim=1)\n",
" # y_test = torch.argmax(torch.tensor(y_test, dtype=torch.float32),dim=1)\n",
" svc_vanilla = svc.fit(X_train, y_train)\n",
" labels_vanilla = svc_vanilla.predict(X_test)\n",
" f1_svc = f1_score(y_test, labels_vanilla, average='micro')\n",
" \n",
" return f1_svc\n",
" \n",
"def compute_QSVC(X_train, y_train, X_test, y_test, encoding='ZZ', c = 1, pegasos=False):\n",
" \n",
" #service = QiskitRuntimeService(instance=\"accelerated-disc/internal/default\") \n",
" service = QiskitRuntimeService() \n",
" backend = AerSimulator(method='statevector')\n",
" algorithm_globals.random_seed = 12345\n",
"\n",
" feature_map = None\n",
"\n",
" if encoding == 'ZZ' :\n",
" feature_map = ZZFeatureMap(feature_dimension=X_train.shape[1], \n",
" reps=2, \n",
" entanglement='linear')\n",
" else: \n",
" if encoding == 'Z': \n",
" feature_map = ZFeatureMap(feature_dimension=X_train.shape[1], \n",
" reps=2)\n",
" if encoding == 'P': \n",
" feature_map = PauliFeatureMap(feature_dimension=X_train.shape[1], \n",
" reps=2, entanglement='linear')\n",
"\n",
" sampler = Sampler() \n",
" fidelity = ComputeUncompute(sampler=sampler)\n",
" Qkernel = FidelityQuantumKernel(fidelity=fidelity, feature_map=feature_map)\n",
" f1_qsvc = QSVC(quantum_kernel=Qkernel, C=c)\n",
" \n",
" f1_peg_qsvc = 0\n",
" if pegasos == True: \n",
" peg_qsvc = PegasosQSVC(quantum_kernel=Qkernel, C=c)\n",
" peg_qsvc_model = peg_qsvc.fit(X_train, y_train)\n",
" labels_peg_qsvc = peg_qsvc_model.predict(X_test)\n",
" f1_peg_qsvc = f1_score(y_test, labels_peg_qsvc, average='micro')\n",
"\n",
" return f1_qsvc,f1_peg_qsvc\n",
"\n",
"def compute_estimator_QNN(X_train, y_train, X_test, y_test, primitive: str):\n",
" \n",
" if primitive == 'estimator':\n",
" # construct QNN with the QNNCircuit's default ZZFeatureMap feature map and RealAmplitudes ansatz.\n",
" qc_qnn = QNNCircuit(num_qubits=X_train.shape[1])\n",
"\n",
" estimator_qnn = EstimatorQNN(circuit=qc_qnn)\n",
" # QNN maps inputs to [-1, +1]\n",
" estimator_qnn.forward(X_train[0, :], algorithm_globals.random.random(estimator_qnn.num_weights))\n",
" # construct neural network classifier\n",
" estimator_classifier = NeuralNetworkClassifier(estimator_qnn, optimizer=COBYLA(maxiter=100))\n",
" # fit classifier to data\n",
" estimator_classifier.fit(X_train, y_train)\n",
" f1_score_estimator_qnn = estimator_classifier.score(X_test, y_test)\n",
" return f1_score_estimator_qnn\n",
" \n",
" if primitive == 'sampler':\n",
" # construct a quantum circuit from the default ZZFeatureMap feature map and a customized RealAmplitudes ansatz\n",
" qc_sampler = QNNCircuit(ansatz=RealAmplitudes(X_train.shape[1], reps=1))\n",
" # parity maps bitstrings to 0 or 1\n",
" def parity(x):\n",
" return \"{:b}\".format(x).count(\"1\") % 2\n",
" output_shape = 2 # corresponds to the number of classes, possible outcomes of the (parity) mapping\n",
" # construct QNN\n",
" sampler_qnn = SamplerQNN(circuit=qc_sampler, interpret=parity,output_shape=output_shape,)\n",
" # construct classifier\n",
" sampler_classifier = NeuralNetworkClassifier(neural_network=sampler_qnn, optimizer=COBYLA(maxiter=100))\n",
" # fit classifier to data\n",
" sampler_classifier.fit(X_train, y_train)\n",
" f1_score_sampler_qnn = sampler_classifier.score(X_test, y_test)\n",
" return f1_score_sampler_qnn"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.01\n",
"0.01\n"
]
}
],
"source": [
"results_dict = {}\n",
"for iter in range(25):\n",
" matches = [x for x in all_checkpoints if \"iter\"+str(iter)+\"_\" in x]\n",
" #iter_num = os.path.basename(all_checkpoints[0]).split('_')[2]\\n\",\n",
" x_train = np.load([x for x in matches if \"train_embedding\" in x][0])\n",
" x_test = np.load([x for x in matches if \"test_embedding\" in x][0])\n",
" y_train = np.load([x for x in matches if \"train_target\" in x][0])\n",
" y_test = np.load([x for x in matches if \"test_target\" in x][0])\n",
"\n",
" f1_svc = compute_svc(x_train,\n",
" y_train,\n",
" x_test,\n",
" y_test,\n",
" c=10)\n",
" \n",
" f1_qsvc, f1_peg_qsvc = compute_QSVC(x_train, \n",
" y_train, \n",
" x_test,\n",
" y_test,\n",
" c=10,\n",
" pegasos=1,\n",
" )\n",
" \n",
" f1_qsvc= compute_QNN(x_train, \n",
" y_train, \n",
" x_test,\n",
" y_test,\n",
" c=10,\n",
" pegasos=1,\n",
" )\n",
"results_dict[iter] = [f1_svc, f1_qsvc, f1_peg_qsvc]\n",
"df = pd.DataFrame.from_dict(results_dict, orient='index')\n",
"df.to_csv('/dccstor/boseukb/Q/ML/v2/results_comparison.csv', index=False, header=['SVC', 'QSVC', 'PEGQSVC'])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}