Diff of /src/ckpt_QML.ipynb [000000] .. [b798eb]

Switch to unified view

a b/src/ckpt_QML.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "markdown",
5
   "metadata": {},
6
   "source": [
7
    "## Quantum machine learning on lower-dimensional single-cell RNAseq data\n",
8
    "\n",
9
    "\n",
10
    "This notebook evaluates the following quantum machine learning models:\n",
11
    "\n",
12
    "* Quantum Support Vector Machine (QSVC) https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.QSVC.html\n",
13
    "* Pegasos QSVC: https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.PegasosQSVC.html\n",
14
    "* Neural Networks: https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.NeuralNetworkClassifier.html\n",
15
    "* Variational Quantum Classifier (VQC): https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.VQC.html\n",
16
    "\n",
17
    "\n",
18
    "It takes as input the lower dimensional embedding of the single-cell RNAseq data with eight dimension of the melanoma minimal residual diseases sample and predicts drug-administered melanoma v/s phase II of minimal residual disease. "
19
   ]
20
  },
21
  {
22
   "cell_type": "code",
23
   "execution_count": null,
24
   "metadata": {},
25
   "outputs": [],
26
   "source": [
27
    "# ====== Base class imports ======\n",
28
    "\n",
29
    "import numpy as np\n",
30
    "import pandas as pd\n",
31
    "from glob import glob\n",
32
    "import matplotlib\n",
33
    "import os\n",
34
    "import matplotlib.pyplot as plt\n",
35
    "import seaborn as sns \n",
36
    "sns.set_style('dark')\n",
37
    "\n",
38
    "# ====== Scikit-learn imports ======\n",
39
    "\n",
40
    "from sklearn.svm import SVC\n",
41
    "from sklearn.metrics import (\n",
42
    "    auc,\n",
43
    "    roc_curve,\n",
44
    "    ConfusionMatrixDisplay,\n",
45
    "    f1_score,\n",
46
    "    balanced_accuracy_score,\n",
47
    ")\n",
48
    "from sklearn.preprocessing import StandardScaler, LabelBinarizer\n",
49
    "from sklearn.model_selection import train_test_split\n",
50
    "\n",
51
    "# ====== Qiskit imports ======\n",
52
    "\n",
53
    "from qiskit import QuantumCircuit\n",
54
    "from qiskit.circuit import Parameter\n",
55
    "from qiskit.circuit.library import RealAmplitudes, ZZFeatureMap\n",
56
    "from qiskit_algorithms.optimizers import COBYLA, L_BFGS_B\n",
57
    "from qiskit_machine_learning.algorithms.classifiers import NeuralNetworkClassifier, VQC\n",
58
    "from qiskit_machine_learning.neural_networks import SamplerQNN, EstimatorQNN\n",
59
    "from qiskit_machine_learning.circuit.library import QNNCircuit\n",
60
    "from qiskit.circuit.library import ZZFeatureMap, ZFeatureMap, PauliFeatureMap\n",
61
    "from qiskit_aer import AerSimulator\n",
62
    "from qiskit_ibm_runtime import QiskitRuntimeService\n",
63
    "from qiskit_algorithms.utils import algorithm_globals\n",
64
    "from qiskit.primitives import Sampler\n",
65
    "from qiskit_ibm_runtime.fake_provider import FakeManilaV2\n",
66
    "from qiskit_algorithms.state_fidelities import ComputeUncompute\n",
67
    "from qiskit_machine_learning.kernels import FidelityQuantumKernel\n",
68
    "from qiskit_machine_learning.algorithms import QSVC, PegasosQSVC\n",
69
    "from qiskit.transpiler.preset_passmanagers import generate_preset_pass_manager\n",
70
    "\n",
71
    "\n",
72
    "## ====== Torch imports ======\n",
73
    "import torch\n",
74
    "import torch.nn as nn \n",
75
    "import pytorch_lightning as pl \n",
76
    "from torchmetrics.classification import F1Score\n",
77
    "import torch.optim as optim\n",
78
    "from lightning.pytorch.utilities.types import OptimizerLRScheduler\n",
79
    "import torch.utils.data\n",
80
    "from pytorch_lightning.loggers import TensorBoardLogger\n",
81
    "import lightning, lightning.pytorch.loggers\n"
82
   ]
83
  },
84
  {
85
   "cell_type": "code",
86
   "execution_count": 2,
87
   "metadata": {},
88
   "outputs": [],
89
   "source": [
90
    "# load checkpoint\n",
91
    "ckpt_path = '/dccstor/boseukb/Q/ML/checkpoints/GSE116237_forQ_iter'\n",
92
    "fname = os.path.basename(ckpt_path)\n",
93
    "all_checkpoints = []\n",
94
    "for fname in glob('/dccstor/boseukb/Q/ML/checkpoints/GSE116237_forQ_iter*/**/*.ckpt', recursive=True):\n",
95
    "    all_checkpoints.append(fname)"
96
   ]
97
  },
98
  {
99
   "cell_type": "code",
100
   "execution_count": 4,
101
   "metadata": {},
102
   "outputs": [],
103
   "source": [
104
    "def compute_svc(X_train, y_train, X_test, y_test, c = 1):\n",
105
    "    svc = SVC(C=c)\n",
106
    "    # y_train = torch.argmax(torch.tensor(y_train, dtype=torch.float32),dim=1)\n",
107
    "    # y_test = torch.argmax(torch.tensor(y_test, dtype=torch.float32),dim=1)\n",
108
    "    svc_vanilla = svc.fit(X_train, y_train)\n",
109
    "    labels_vanilla = svc_vanilla.predict(X_test)\n",
110
    "    f1_svc = f1_score(y_test, labels_vanilla, average='micro')\n",
111
    "    \n",
112
    "    return f1_svc\n",
113
    "    \n",
114
    "def compute_QSVC(X_train, y_train, X_test, y_test, encoding='ZZ', c = 1, pegasos=False):\n",
115
    "    \n",
116
    "    #service = QiskitRuntimeService(instance=\"accelerated-disc/internal/default\")    \n",
117
    "    service = QiskitRuntimeService()    \n",
118
    "    backend = AerSimulator(method='statevector')\n",
119
    "    algorithm_globals.random_seed = 12345\n",
120
    "\n",
121
    "    feature_map = None\n",
122
    "\n",
123
    "    if encoding == 'ZZ' :\n",
124
    "        feature_map = ZZFeatureMap(feature_dimension=X_train.shape[1], \n",
125
    "                            reps=2, \n",
126
    "                            entanglement='linear')\n",
127
    "    else: \n",
128
    "        if encoding == 'Z': \n",
129
    "            feature_map = ZFeatureMap(feature_dimension=X_train.shape[1], \n",
130
    "                            reps=2)\n",
131
    "        if encoding == 'P': \n",
132
    "            feature_map = PauliFeatureMap(feature_dimension=X_train.shape[1], \n",
133
    "                            reps=2, entanglement='linear')\n",
134
    "\n",
135
    "    sampler = Sampler() \n",
136
    "    fidelity = ComputeUncompute(sampler=sampler)\n",
137
    "    Qkernel = FidelityQuantumKernel(fidelity=fidelity, feature_map=feature_map)\n",
138
    "    f1_qsvc = QSVC(quantum_kernel=Qkernel, C=c)\n",
139
    "    \n",
140
    "    f1_peg_qsvc = 0\n",
141
    "    if pegasos == True: \n",
142
    "        peg_qsvc = PegasosQSVC(quantum_kernel=Qkernel, C=c)\n",
143
    "        peg_qsvc_model = peg_qsvc.fit(X_train, y_train)\n",
144
    "        labels_peg_qsvc = peg_qsvc_model.predict(X_test)\n",
145
    "        f1_peg_qsvc = f1_score(y_test, labels_peg_qsvc, average='micro')\n",
146
    "\n",
147
    "    return f1_qsvc,f1_peg_qsvc\n",
148
    "\n",
149
    "def compute_estimator_QNN(X_train, y_train, X_test, y_test, primitive: str):\n",
150
    "    \n",
151
    "    if primitive == 'estimator':\n",
152
    "        # construct QNN with the QNNCircuit's default ZZFeatureMap feature map and RealAmplitudes ansatz.\n",
153
    "        qc_qnn = QNNCircuit(num_qubits=X_train.shape[1])\n",
154
    "\n",
155
    "        estimator_qnn = EstimatorQNN(circuit=qc_qnn)\n",
156
    "        # QNN maps inputs to [-1, +1]\n",
157
    "        estimator_qnn.forward(X_train[0, :], algorithm_globals.random.random(estimator_qnn.num_weights))\n",
158
    "        # construct neural network classifier\n",
159
    "        estimator_classifier = NeuralNetworkClassifier(estimator_qnn, optimizer=COBYLA(maxiter=100))\n",
160
    "        # fit classifier to data\n",
161
    "        estimator_classifier.fit(X_train, y_train)\n",
162
    "        f1_score_estimator_qnn = estimator_classifier.score(X_test, y_test)\n",
163
    "        return f1_score_estimator_qnn\n",
164
    "    \n",
165
    "    if primitive == 'sampler':\n",
166
    "        # construct a quantum circuit from the default ZZFeatureMap feature map and a customized RealAmplitudes ansatz\n",
167
    "        qc_sampler = QNNCircuit(ansatz=RealAmplitudes(X_train.shape[1], reps=1))\n",
168
    "        # parity maps bitstrings to 0 or 1\n",
169
    "        def parity(x):\n",
170
    "            return \"{:b}\".format(x).count(\"1\") % 2\n",
171
    "        output_shape = 2  # corresponds to the number of classes, possible outcomes of the (parity) mapping\n",
172
    "        # construct QNN\n",
173
    "        sampler_qnn = SamplerQNN(circuit=qc_sampler, interpret=parity,output_shape=output_shape,)\n",
174
    "        # construct classifier\n",
175
    "        sampler_classifier = NeuralNetworkClassifier(neural_network=sampler_qnn, optimizer=COBYLA(maxiter=100))\n",
176
    "        # fit classifier to data\n",
177
    "        sampler_classifier.fit(X_train, y_train)\n",
178
    "        f1_score_sampler_qnn = sampler_classifier.score(X_test, y_test)\n",
179
    "        return f1_score_sampler_qnn"
180
   ]
181
  },
182
  {
183
   "cell_type": "code",
184
   "execution_count": 15,
185
   "metadata": {},
186
   "outputs": [
187
    {
188
     "name": "stdout",
189
     "output_type": "stream",
190
     "text": [
191
      "0.01\n",
192
      "0.01\n"
193
     ]
194
    }
195
   ],
196
   "source": [
197
    "results_dict = {}\n",
198
    "for iter in range(25):\n",
199
    "    matches = [x for x in all_checkpoints if \"iter\"+str(iter)+\"_\" in x]\n",
200
    "    #iter_num = os.path.basename(all_checkpoints[0]).split('_')[2]\\n\",\n",
201
    "    x_train = np.load([x for x in matches if \"train_embedding\" in x][0])\n",
202
    "    x_test = np.load([x for x in matches if \"test_embedding\" in x][0])\n",
203
    "    y_train = np.load([x for x in matches if \"train_target\" in x][0])\n",
204
    "    y_test = np.load([x for x in matches if \"test_target\" in x][0])\n",
205
    "\n",
206
    "    f1_svc = compute_svc(x_train,\n",
207
    "                        y_train,\n",
208
    "                        x_test,\n",
209
    "                        y_test,\n",
210
    "                        c=10)\n",
211
    "    \n",
212
    "    f1_qsvc, f1_peg_qsvc = compute_QSVC(x_train, \n",
213
    "                                        y_train, \n",
214
    "                                        x_test,\n",
215
    "                                        y_test,\n",
216
    "                                        c=10,\n",
217
    "                                        pegasos=1,\n",
218
    "                                        )\n",
219
    "    \n",
220
    "    f1_qsvc= compute_QNN(x_train, \n",
221
    "                                        y_train, \n",
222
    "                                        x_test,\n",
223
    "                                        y_test,\n",
224
    "                                        c=10,\n",
225
    "                                        pegasos=1,\n",
226
    "                                        )\n",
227
    "results_dict[iter] = [f1_svc, f1_qsvc, f1_peg_qsvc]\n",
228
    "df = pd.DataFrame.from_dict(results_dict, orient='index')\n",
229
    "df.to_csv('/dccstor/boseukb/Q/ML/v2/results_comparison.csv', index=False, header=['SVC', 'QSVC', 'PEGQSVC'])"
230
   ]
231
  }
232
 ],
233
 "metadata": {
234
  "kernelspec": {
235
   "display_name": "Python 3",
236
   "language": "python",
237
   "name": "python3"
238
  },
239
  "language_info": {
240
   "codemirror_mode": {
241
    "name": "ipython",
242
    "version": 3
243
   },
244
   "file_extension": ".py",
245
   "mimetype": "text/x-python",
246
   "name": "python",
247
   "nbconvert_exporter": "python",
248
   "pygments_lexer": "ipython3",
249
   "version": "3.12.3"
250
  }
251
 },
252
 "nbformat": 4,
253
 "nbformat_minor": 2
254
}