Diff of /src/ckpt_QML.ipynb [000000] .. [b798eb]

Switch to side-by-side view

--- a
+++ b/src/ckpt_QML.ipynb
@@ -0,0 +1,254 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Quantum machine learning on lower-dimensional single-cell RNAseq data\n",
+    "\n",
+    "\n",
+    "This notebook evaluates the following quantum machine learning models:\n",
+    "\n",
+    "* Quantum Support Vector Machine (QSVC) https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.QSVC.html\n",
+    "* Pegasos QSVC: https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.PegasosQSVC.html\n",
+    "* Neural Networks: https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.NeuralNetworkClassifier.html\n",
+    "* Variational Quantum Classifier (VQC): https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.VQC.html\n",
+    "\n",
+    "\n",
+    "It takes as input the lower dimensional embedding of the single-cell RNAseq data with eight dimension of the melanoma minimal residual diseases sample and predicts drug-administered melanoma v/s phase II of minimal residual disease. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ====== Base class imports ======\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from glob import glob\n",
+    "import matplotlib\n",
+    "import os\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns \n",
+    "sns.set_style('dark')\n",
+    "\n",
+    "# ====== Scikit-learn imports ======\n",
+    "\n",
+    "from sklearn.svm import SVC\n",
+    "from sklearn.metrics import (\n",
+    "    auc,\n",
+    "    roc_curve,\n",
+    "    ConfusionMatrixDisplay,\n",
+    "    f1_score,\n",
+    "    balanced_accuracy_score,\n",
+    ")\n",
+    "from sklearn.preprocessing import StandardScaler, LabelBinarizer\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "# ====== Qiskit imports ======\n",
+    "\n",
+    "from qiskit import QuantumCircuit\n",
+    "from qiskit.circuit import Parameter\n",
+    "from qiskit.circuit.library import RealAmplitudes, ZZFeatureMap\n",
+    "from qiskit_algorithms.optimizers import COBYLA, L_BFGS_B\n",
+    "from qiskit_machine_learning.algorithms.classifiers import NeuralNetworkClassifier, VQC\n",
+    "from qiskit_machine_learning.neural_networks import SamplerQNN, EstimatorQNN\n",
+    "from qiskit_machine_learning.circuit.library import QNNCircuit\n",
+    "from qiskit.circuit.library import ZZFeatureMap, ZFeatureMap, PauliFeatureMap\n",
+    "from qiskit_aer import AerSimulator\n",
+    "from qiskit_ibm_runtime import QiskitRuntimeService\n",
+    "from qiskit_algorithms.utils import algorithm_globals\n",
+    "from qiskit.primitives import Sampler\n",
+    "from qiskit_ibm_runtime.fake_provider import FakeManilaV2\n",
+    "from qiskit_algorithms.state_fidelities import ComputeUncompute\n",
+    "from qiskit_machine_learning.kernels import FidelityQuantumKernel\n",
+    "from qiskit_machine_learning.algorithms import QSVC, PegasosQSVC\n",
+    "from qiskit.transpiler.preset_passmanagers import generate_preset_pass_manager\n",
+    "\n",
+    "\n",
+    "## ====== Torch imports ======\n",
+    "import torch\n",
+    "import torch.nn as nn \n",
+    "import pytorch_lightning as pl \n",
+    "from torchmetrics.classification import F1Score\n",
+    "import torch.optim as optim\n",
+    "from lightning.pytorch.utilities.types import OptimizerLRScheduler\n",
+    "import torch.utils.data\n",
+    "from pytorch_lightning.loggers import TensorBoardLogger\n",
+    "import lightning, lightning.pytorch.loggers\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load checkpoint\n",
+    "ckpt_path = '/dccstor/boseukb/Q/ML/checkpoints/GSE116237_forQ_iter'\n",
+    "fname = os.path.basename(ckpt_path)\n",
+    "all_checkpoints = []\n",
+    "for fname in glob('/dccstor/boseukb/Q/ML/checkpoints/GSE116237_forQ_iter*/**/*.ckpt', recursive=True):\n",
+    "    all_checkpoints.append(fname)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_svc(X_train, y_train, X_test, y_test, c = 1):\n",
+    "    svc = SVC(C=c)\n",
+    "    # y_train = torch.argmax(torch.tensor(y_train, dtype=torch.float32),dim=1)\n",
+    "    # y_test = torch.argmax(torch.tensor(y_test, dtype=torch.float32),dim=1)\n",
+    "    svc_vanilla = svc.fit(X_train, y_train)\n",
+    "    labels_vanilla = svc_vanilla.predict(X_test)\n",
+    "    f1_svc = f1_score(y_test, labels_vanilla, average='micro')\n",
+    "    \n",
+    "    return f1_svc\n",
+    "    \n",
+    "def compute_QSVC(X_train, y_train, X_test, y_test, encoding='ZZ', c = 1, pegasos=False):\n",
+    "    \n",
+    "    #service = QiskitRuntimeService(instance=\"accelerated-disc/internal/default\")    \n",
+    "    service = QiskitRuntimeService()    \n",
+    "    backend = AerSimulator(method='statevector')\n",
+    "    algorithm_globals.random_seed = 12345\n",
+    "\n",
+    "    feature_map = None\n",
+    "\n",
+    "    if encoding == 'ZZ' :\n",
+    "        feature_map = ZZFeatureMap(feature_dimension=X_train.shape[1], \n",
+    "                            reps=2, \n",
+    "                            entanglement='linear')\n",
+    "    else: \n",
+    "        if encoding == 'Z': \n",
+    "            feature_map = ZFeatureMap(feature_dimension=X_train.shape[1], \n",
+    "                            reps=2)\n",
+    "        if encoding == 'P': \n",
+    "            feature_map = PauliFeatureMap(feature_dimension=X_train.shape[1], \n",
+    "                            reps=2, entanglement='linear')\n",
+    "\n",
+    "    sampler = Sampler() \n",
+    "    fidelity = ComputeUncompute(sampler=sampler)\n",
+    "    Qkernel = FidelityQuantumKernel(fidelity=fidelity, feature_map=feature_map)\n",
+    "    f1_qsvc = QSVC(quantum_kernel=Qkernel, C=c)\n",
+    "    \n",
+    "    f1_peg_qsvc = 0\n",
+    "    if pegasos == True: \n",
+    "        peg_qsvc = PegasosQSVC(quantum_kernel=Qkernel, C=c)\n",
+    "        peg_qsvc_model = peg_qsvc.fit(X_train, y_train)\n",
+    "        labels_peg_qsvc = peg_qsvc_model.predict(X_test)\n",
+    "        f1_peg_qsvc = f1_score(y_test, labels_peg_qsvc, average='micro')\n",
+    "\n",
+    "    return f1_qsvc,f1_peg_qsvc\n",
+    "\n",
+    "def compute_estimator_QNN(X_train, y_train, X_test, y_test, primitive: str):\n",
+    "    \n",
+    "    if primitive == 'estimator':\n",
+    "        # construct QNN with the QNNCircuit's default ZZFeatureMap feature map and RealAmplitudes ansatz.\n",
+    "        qc_qnn = QNNCircuit(num_qubits=X_train.shape[1])\n",
+    "\n",
+    "        estimator_qnn = EstimatorQNN(circuit=qc_qnn)\n",
+    "        # QNN maps inputs to [-1, +1]\n",
+    "        estimator_qnn.forward(X_train[0, :], algorithm_globals.random.random(estimator_qnn.num_weights))\n",
+    "        # construct neural network classifier\n",
+    "        estimator_classifier = NeuralNetworkClassifier(estimator_qnn, optimizer=COBYLA(maxiter=100))\n",
+    "        # fit classifier to data\n",
+    "        estimator_classifier.fit(X_train, y_train)\n",
+    "        f1_score_estimator_qnn = estimator_classifier.score(X_test, y_test)\n",
+    "        return f1_score_estimator_qnn\n",
+    "    \n",
+    "    if primitive == 'sampler':\n",
+    "        # construct a quantum circuit from the default ZZFeatureMap feature map and a customized RealAmplitudes ansatz\n",
+    "        qc_sampler = QNNCircuit(ansatz=RealAmplitudes(X_train.shape[1], reps=1))\n",
+    "        # parity maps bitstrings to 0 or 1\n",
+    "        def parity(x):\n",
+    "            return \"{:b}\".format(x).count(\"1\") % 2\n",
+    "        output_shape = 2  # corresponds to the number of classes, possible outcomes of the (parity) mapping\n",
+    "        # construct QNN\n",
+    "        sampler_qnn = SamplerQNN(circuit=qc_sampler, interpret=parity,output_shape=output_shape,)\n",
+    "        # construct classifier\n",
+    "        sampler_classifier = NeuralNetworkClassifier(neural_network=sampler_qnn, optimizer=COBYLA(maxiter=100))\n",
+    "        # fit classifier to data\n",
+    "        sampler_classifier.fit(X_train, y_train)\n",
+    "        f1_score_sampler_qnn = sampler_classifier.score(X_test, y_test)\n",
+    "        return f1_score_sampler_qnn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.01\n",
+      "0.01\n"
+     ]
+    }
+   ],
+   "source": [
+    "results_dict = {}\n",
+    "for iter in range(25):\n",
+    "    matches = [x for x in all_checkpoints if \"iter\"+str(iter)+\"_\" in x]\n",
+    "    #iter_num = os.path.basename(all_checkpoints[0]).split('_')[2]\\n\",\n",
+    "    x_train = np.load([x for x in matches if \"train_embedding\" in x][0])\n",
+    "    x_test = np.load([x for x in matches if \"test_embedding\" in x][0])\n",
+    "    y_train = np.load([x for x in matches if \"train_target\" in x][0])\n",
+    "    y_test = np.load([x for x in matches if \"test_target\" in x][0])\n",
+    "\n",
+    "    f1_svc = compute_svc(x_train,\n",
+    "                        y_train,\n",
+    "                        x_test,\n",
+    "                        y_test,\n",
+    "                        c=10)\n",
+    "    \n",
+    "    f1_qsvc, f1_peg_qsvc = compute_QSVC(x_train, \n",
+    "                                        y_train, \n",
+    "                                        x_test,\n",
+    "                                        y_test,\n",
+    "                                        c=10,\n",
+    "                                        pegasos=1,\n",
+    "                                        )\n",
+    "    \n",
+    "    f1_qsvc= compute_QNN(x_train, \n",
+    "                                        y_train, \n",
+    "                                        x_test,\n",
+    "                                        y_test,\n",
+    "                                        c=10,\n",
+    "                                        pegasos=1,\n",
+    "                                        )\n",
+    "results_dict[iter] = [f1_svc, f1_qsvc, f1_peg_qsvc]\n",
+    "df = pd.DataFrame.from_dict(results_dict, orient='index')\n",
+    "df.to_csv('/dccstor/boseukb/Q/ML/v2/results_comparison.csv', index=False, header=['SVC', 'QSVC', 'PEGQSVC'])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}