[f539ea]: / src / ckpt_QML.ipynb

Download this file

255 lines (254 with data), 11.0 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Quantum machine learning on lower-dimensional single-cell RNAseq data\n",
    "\n",
    "\n",
    "This notebook evaluates the following quantum machine learning models:\n",
    "\n",
    "* Quantum Support Vector Machine (QSVC) https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.QSVC.html\n",
    "* Pegasos QSVC: https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.PegasosQSVC.html\n",
    "* Neural Networks: https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.NeuralNetworkClassifier.html\n",
    "* Variational Quantum Classifier (VQC): https://qiskit-community.github.io/qiskit-machine-learning/stubs/qiskit_machine_learning.algorithms.VQC.html\n",
    "\n",
    "\n",
    "It takes as input the lower dimensional embedding of the single-cell RNAseq data with eight dimension of the melanoma minimal residual diseases sample and predicts drug-administered melanoma v/s phase II of minimal residual disease. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ====== Base class imports ======\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from glob import glob\n",
    "import matplotlib\n",
    "import os\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns \n",
    "sns.set_style('dark')\n",
    "\n",
    "# ====== Scikit-learn imports ======\n",
    "\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.metrics import (\n",
    "    auc,\n",
    "    roc_curve,\n",
    "    ConfusionMatrixDisplay,\n",
    "    f1_score,\n",
    "    balanced_accuracy_score,\n",
    ")\n",
    "from sklearn.preprocessing import StandardScaler, LabelBinarizer\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# ====== Qiskit imports ======\n",
    "\n",
    "from qiskit import QuantumCircuit\n",
    "from qiskit.circuit import Parameter\n",
    "from qiskit.circuit.library import RealAmplitudes, ZZFeatureMap\n",
    "from qiskit_algorithms.optimizers import COBYLA, L_BFGS_B\n",
    "from qiskit_machine_learning.algorithms.classifiers import NeuralNetworkClassifier, VQC\n",
    "from qiskit_machine_learning.neural_networks import SamplerQNN, EstimatorQNN\n",
    "from qiskit_machine_learning.circuit.library import QNNCircuit\n",
    "from qiskit.circuit.library import ZZFeatureMap, ZFeatureMap, PauliFeatureMap\n",
    "from qiskit_aer import AerSimulator\n",
    "from qiskit_ibm_runtime import QiskitRuntimeService\n",
    "from qiskit_algorithms.utils import algorithm_globals\n",
    "from qiskit.primitives import Sampler\n",
    "from qiskit_ibm_runtime.fake_provider import FakeManilaV2\n",
    "from qiskit_algorithms.state_fidelities import ComputeUncompute\n",
    "from qiskit_machine_learning.kernels import FidelityQuantumKernel\n",
    "from qiskit_machine_learning.algorithms import QSVC, PegasosQSVC\n",
    "from qiskit.transpiler.preset_passmanagers import generate_preset_pass_manager\n",
    "\n",
    "\n",
    "## ====== Torch imports ======\n",
    "import torch\n",
    "import torch.nn as nn \n",
    "import pytorch_lightning as pl \n",
    "from torchmetrics.classification import F1Score\n",
    "import torch.optim as optim\n",
    "from lightning.pytorch.utilities.types import OptimizerLRScheduler\n",
    "import torch.utils.data\n",
    "from pytorch_lightning.loggers import TensorBoardLogger\n",
    "import lightning, lightning.pytorch.loggers\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load checkpoint\n",
    "ckpt_path = '/dccstor/boseukb/Q/ML/checkpoints/GSE116237_forQ_iter'\n",
    "fname = os.path.basename(ckpt_path)\n",
    "all_checkpoints = []\n",
    "for fname in glob('/dccstor/boseukb/Q/ML/checkpoints/GSE116237_forQ_iter*/**/*.ckpt', recursive=True):\n",
    "    all_checkpoints.append(fname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_svc(X_train, y_train, X_test, y_test, c = 1):\n",
    "    svc = SVC(C=c)\n",
    "    # y_train = torch.argmax(torch.tensor(y_train, dtype=torch.float32),dim=1)\n",
    "    # y_test = torch.argmax(torch.tensor(y_test, dtype=torch.float32),dim=1)\n",
    "    svc_vanilla = svc.fit(X_train, y_train)\n",
    "    labels_vanilla = svc_vanilla.predict(X_test)\n",
    "    f1_svc = f1_score(y_test, labels_vanilla, average='micro')\n",
    "    \n",
    "    return f1_svc\n",
    "    \n",
    "def compute_QSVC(X_train, y_train, X_test, y_test, encoding='ZZ', c = 1, pegasos=False):\n",
    "    \n",
    "    #service = QiskitRuntimeService(instance=\"accelerated-disc/internal/default\")    \n",
    "    service = QiskitRuntimeService()    \n",
    "    backend = AerSimulator(method='statevector')\n",
    "    algorithm_globals.random_seed = 12345\n",
    "\n",
    "    feature_map = None\n",
    "\n",
    "    if encoding == 'ZZ' :\n",
    "        feature_map = ZZFeatureMap(feature_dimension=X_train.shape[1], \n",
    "                            reps=2, \n",
    "                            entanglement='linear')\n",
    "    else: \n",
    "        if encoding == 'Z': \n",
    "            feature_map = ZFeatureMap(feature_dimension=X_train.shape[1], \n",
    "                            reps=2)\n",
    "        if encoding == 'P': \n",
    "            feature_map = PauliFeatureMap(feature_dimension=X_train.shape[1], \n",
    "                            reps=2, entanglement='linear')\n",
    "\n",
    "    sampler = Sampler() \n",
    "    fidelity = ComputeUncompute(sampler=sampler)\n",
    "    Qkernel = FidelityQuantumKernel(fidelity=fidelity, feature_map=feature_map)\n",
    "    f1_qsvc = QSVC(quantum_kernel=Qkernel, C=c)\n",
    "    \n",
    "    f1_peg_qsvc = 0\n",
    "    if pegasos == True: \n",
    "        peg_qsvc = PegasosQSVC(quantum_kernel=Qkernel, C=c)\n",
    "        peg_qsvc_model = peg_qsvc.fit(X_train, y_train)\n",
    "        labels_peg_qsvc = peg_qsvc_model.predict(X_test)\n",
    "        f1_peg_qsvc = f1_score(y_test, labels_peg_qsvc, average='micro')\n",
    "\n",
    "    return f1_qsvc,f1_peg_qsvc\n",
    "\n",
    "def compute_estimator_QNN(X_train, y_train, X_test, y_test, primitive: str):\n",
    "    \n",
    "    if primitive == 'estimator':\n",
    "        # construct QNN with the QNNCircuit's default ZZFeatureMap feature map and RealAmplitudes ansatz.\n",
    "        qc_qnn = QNNCircuit(num_qubits=X_train.shape[1])\n",
    "\n",
    "        estimator_qnn = EstimatorQNN(circuit=qc_qnn)\n",
    "        # QNN maps inputs to [-1, +1]\n",
    "        estimator_qnn.forward(X_train[0, :], algorithm_globals.random.random(estimator_qnn.num_weights))\n",
    "        # construct neural network classifier\n",
    "        estimator_classifier = NeuralNetworkClassifier(estimator_qnn, optimizer=COBYLA(maxiter=100))\n",
    "        # fit classifier to data\n",
    "        estimator_classifier.fit(X_train, y_train)\n",
    "        f1_score_estimator_qnn = estimator_classifier.score(X_test, y_test)\n",
    "        return f1_score_estimator_qnn\n",
    "    \n",
    "    if primitive == 'sampler':\n",
    "        # construct a quantum circuit from the default ZZFeatureMap feature map and a customized RealAmplitudes ansatz\n",
    "        qc_sampler = QNNCircuit(ansatz=RealAmplitudes(X_train.shape[1], reps=1))\n",
    "        # parity maps bitstrings to 0 or 1\n",
    "        def parity(x):\n",
    "            return \"{:b}\".format(x).count(\"1\") % 2\n",
    "        output_shape = 2  # corresponds to the number of classes, possible outcomes of the (parity) mapping\n",
    "        # construct QNN\n",
    "        sampler_qnn = SamplerQNN(circuit=qc_sampler, interpret=parity,output_shape=output_shape,)\n",
    "        # construct classifier\n",
    "        sampler_classifier = NeuralNetworkClassifier(neural_network=sampler_qnn, optimizer=COBYLA(maxiter=100))\n",
    "        # fit classifier to data\n",
    "        sampler_classifier.fit(X_train, y_train)\n",
    "        f1_score_sampler_qnn = sampler_classifier.score(X_test, y_test)\n",
    "        return f1_score_sampler_qnn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.01\n",
      "0.01\n"
     ]
    }
   ],
   "source": [
    "results_dict = {}\n",
    "for iter in range(25):\n",
    "    matches = [x for x in all_checkpoints if \"iter\"+str(iter)+\"_\" in x]\n",
    "    #iter_num = os.path.basename(all_checkpoints[0]).split('_')[2]\\n\",\n",
    "    x_train = np.load([x for x in matches if \"train_embedding\" in x][0])\n",
    "    x_test = np.load([x for x in matches if \"test_embedding\" in x][0])\n",
    "    y_train = np.load([x for x in matches if \"train_target\" in x][0])\n",
    "    y_test = np.load([x for x in matches if \"test_target\" in x][0])\n",
    "\n",
    "    f1_svc = compute_svc(x_train,\n",
    "                        y_train,\n",
    "                        x_test,\n",
    "                        y_test,\n",
    "                        c=10)\n",
    "    \n",
    "    f1_qsvc, f1_peg_qsvc = compute_QSVC(x_train, \n",
    "                                        y_train, \n",
    "                                        x_test,\n",
    "                                        y_test,\n",
    "                                        c=10,\n",
    "                                        pegasos=1,\n",
    "                                        )\n",
    "    \n",
    "    f1_qsvc= compute_QNN(x_train, \n",
    "                                        y_train, \n",
    "                                        x_test,\n",
    "                                        y_test,\n",
    "                                        c=10,\n",
    "                                        pegasos=1,\n",
    "                                        )\n",
    "results_dict[iter] = [f1_svc, f1_qsvc, f1_peg_qsvc]\n",
    "df = pd.DataFrame.from_dict(results_dict, orient='index')\n",
    "df.to_csv('/dccstor/boseukb/Q/ML/v2/results_comparison.csv', index=False, header=['SVC', 'QSVC', 'PEGQSVC'])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}