[0b32b6]: / R-scripts / scripts / .ipynb_checkpoints / 未命名4-checkpoint.ipynb

Download this file

180 lines (179 with data), 9.4 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "VAE_FCTAE_EM:\tclust_iCluster: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \n",
      "0 0.8571429 1\n",
      "AE_FAETC_EM:\tclust_iCluster: 3 3 3 1 3 3 3 3 3 3 1 1 3 3 1 1 3 1 1 1 1 3 1 2 1 2 3 1 3 3 1 3 1 1 1 1 1 3 1 1 1 3 1 1 3 1 3 2 3 3 3 1 3 1 3 1 1 1 3 2 1 1 3 1 1 1 3 1 1 1 1 3 1 1 1 3 1 1 1 3 1 3 3 1 1 3 2 1 3 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \n",
      "0 0.6115702 0\n",
      "AE_FCTAE_EM:\tclust_iCluster: 1 1 1 2 1 1 1 1 1 1 2 2 1 1 2 2 1 1 2 2 2 1 1 2 2 2 2 1 1 1 1 1 2 2 1 2 1 1 1 1 2 1 1 2 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 2 2 2 1 1 2 1 2 1 1 1 1 1 2 1 1 2 1 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 2 2 3 2 3 3 3 3 3 2 3 2 2 3 3 2 2 3 3 2 2 3 3 3 3 3 3 3 3 3 3 3 2 2 3 3 3 3 3 2 2 3 2 3 3 2 2 2 3 3 3 3 3 3 2 3 2 3 3 3 3 3 3 2 2 2 3 2 2 3 3 2 \n",
      "0 0.4242424 0.6486486\n",
      "DAE_FAETC_EM:\tclust_iCluster: 1 1 1 2 1 1 1 1 1 1 2 2 1 1 2 2 1 2 2 2 2 1 2 3 2 3 1 2 1 2 2 1 2 2 2 2 2 1 2 2 2 1 2 2 1 2 1 3 1 1 1 2 2 2 1 2 2 2 1 3 2 2 1 2 2 2 1 2 2 2 2 1 2 2 2 1 2 2 2 1 2 1 1 2 2 1 3 2 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \n",
      "0 0 0.6115702\n",
      "DAE_FCTAE_EM:\tclust_iCluster: 1 1 1 1 3 3 1 3 1 1 1 1 1 3 1 1 1 1 1 1 1 3 1 2 1 1 1 1 1 1 1 1 1 1 3 1 1 3 1 1 1 1 1 1 1 1 3 2 3 1 1 1 1 1 3 1 1 1 1 2 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 3 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \n",
      "0.00862069 0.6271186 0\n",
      "SVAE_FCTAE_EM:\tclust_iCluster: 1 1 2 1 2 2 2 2 1 1 3 1 2 2 1 1 2 1 1 1 1 2 2 3 3 3 2 1 1 2 1 2 1 1 2 1 1 2 1 2 1 2 1 1 1 2 2 3 2 1 2 1 1 2 2 2 2 2 2 3 1 1 2 2 1 1 2 1 1 1 1 2 1 2 1 2 2 1 2 2 1 2 2 1 2 1 3 3 2 1 3 3 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 1 1 3 3 1 1 3 2 2 1 1 1 1 2 2 2 1 1 2 1 3 1 2 1 1 3 2 1 1 3 1 1 2 1 1 1 1 1 2 1 2 3 1 1 1 2 1 1 3 1 1 1 1 1 1 2 1 2 1 3 2 1 1 1 1 1 2 1 1 1 1 1 1 3 3 1 1 1 1 1 1 \n",
      "0.4065041 0.1403509 0.6034483\n",
      "MMDVAE_EM:\tclust_iCluster: 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \n",
      "1 0 0.9130435\n"
     ]
    }
   ],
   "source": [
    "library(\"clusterCrit\")\n",
    "data_names<-c('VAE_FCTAE_EM','AE_FAETC_EM', 'AE_FCTAE_EM', 'DAE_FAETC_EM', 'DAE_FCTAE_EM','SVAE_FCTAE_EM','MMDVAE_EM')\n",
    "for(data_name in data_names){\n",
    "    cat(data_name)\n",
    "    cat(':\\t')\n",
    "    \n",
    "    number_cl=3\n",
    "    ind <- 0\n",
    "    JI_final <- matrix(data=NA, nrow=number_cl, ncol=1)\n",
    "    #cat(JI_final,\"\\n\")\n",
    "    # Read clusters imposed on simulated data \n",
    "    \n",
    "    cl  <- as.matrix(read.table(\"../data/single-cell/celltype2.txt\", \n",
    "                                sep=\"\\t\",  header=FALSE))\n",
    "    cl2 <- as.matrix(as.numeric(cl[,2]))\n",
    "    rownames(cl2) <- cl[,1]\n",
    "    #cat(cl2)\n",
    "\n",
    "    factor_path=paste(\"../data/single-cell/\",data_name,'.txt',sep='')\n",
    "    factorization=read.table(factor_path, sep=\" \",row.names=cl[,1],  header=FALSE)\n",
    "\n",
    "\n",
    "    factors <- factorization\n",
    "\n",
    "    # Clustering by Kmeans\n",
    "    JI_good <- numeric(0)\n",
    "    for (run in 1:1) {\n",
    "    kmeans.out <- kmeans(factors, centers=number_cl) \n",
    "    clust_iCluster <- as.matrix(kmeans.out$cluster)\n",
    "    cat(\"clust_iCluster:\",clust_iCluster,\"\\n\")\n",
    "    ######creation sets of samples\n",
    "    JI_mat <- numeric(0)\n",
    "    for (p in 1:number_cl) {\n",
    "        x1  <- rownames(clust_iCluster)[which(clust_iCluster[,1]==p)]\n",
    "        #print(x1)\n",
    "\n",
    "        row <- numeric(0)\n",
    "        for(j in 1:number_cl) {\n",
    "            x2  <- rownames(cl2)[which(cl2[,1]==j)]\n",
    "            #cat(\"x2:\",x2,\"\\n\")\n",
    "            I   <- length(intersect(x1,x2))#交集\n",
    "            #cat(\"I:\",I,\"\\n\")\n",
    "            S   <- I/(length(x1)+length(x2)-I)\n",
    "            #cat(\"S:\",S,\"\\n\")\n",
    "            row <- cbind(row,S)\n",
    "            #cat(\"row:\",row,\"\\n\")\n",
    "         }\n",
    "         JI_mat <- rbind(JI_mat,row)\n",
    "         #cat(\"JI_mat:\",JI_mat,\"\\n\")\n",
    "         #print(JI_mat)\n",
    "\n",
    "\n",
    "     }\n",
    "     JI_good <- rbind(JI_good,apply(JI_mat,1,max))\n",
    "     #cat(\"JI_good:\",JI_good,\"\\n\")\n",
    "    }\n",
    "    JI_final[1:number_cl] <- apply(JI_good,2,mean)\n",
    "    cat(JI_final)\n",
    "    cat('\\n')\n",
    "    \n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "equal\n",
      "5\n",
      "VAE_FCTAE_EM:\t0.0544775\tAE_FAETC_EM:\t0.01141185\tAE_FCTAE_EM:\t0.08357372\tDAE_FAETC_EM:\t0.04257227\tDAE_FCTAE_EM:\t0.1270777\tSVAE_FCTAE_EM:\t0.2863522\tMMDVAE_EM:\t0.1695007\t\n",
      "10\n",
      "VAE_FCTAE_EM:\t0.07769484\tAE_FAETC_EM:\t0.1117876\tAE_FCTAE_EM:\t0.2016015\tDAE_FAETC_EM:\t0.1228961\tDAE_FCTAE_EM:\t0.2104492\tSVAE_FCTAE_EM:\t0.3060561\tMMDVAE_EM:\t0.1946011\t\n",
      "15\n",
      "VAE_FCTAE_EM:\t0.09746227\tAE_FAETC_EM:\t0.2486277\tAE_FCTAE_EM:\t0.2577513\tDAE_FAETC_EM:\t0.1078301\tDAE_FCTAE_EM:\t0.1926133\tSVAE_FCTAE_EM:\t0.3067726\tMMDVAE_EM:\t0.1547294\t\n",
      "heterogeneous\n",
      "5\n",
      "VAE_FCTAE_EM:\t0.06913109\tAE_FAETC_EM:\t0.02427257\tAE_FCTAE_EM:\t0.1161989\tDAE_FAETC_EM:\t0.1392568\tDAE_FCTAE_EM:\t0.0851285\tSVAE_FCTAE_EM:\t0.251067\tMMDVAE_EM:\t0.1270979\t\n",
      "10\n",
      "VAE_FCTAE_EM:\t0.07044105\tAE_FAETC_EM:\t0.07937024\tAE_FCTAE_EM:\t0.2033531\tDAE_FAETC_EM:\t0.2020112\tDAE_FCTAE_EM:\t0.1552261\tSVAE_FCTAE_EM:\t0.2597297\tMMDVAE_EM:\t0.1631524\t\n",
      "15\n",
      "VAE_FCTAE_EM:\t0.1046205\tAE_FAETC_EM:\t0.08161738\tAE_FCTAE_EM:\t0.2291148\tDAE_FAETC_EM:\t0.1344086\tDAE_FCTAE_EM:\t0.1581898\tSVAE_FCTAE_EM:\t0.3091611\tMMDVAE_EM:\t0.06387829\t\n"
     ]
    }
   ],
   "source": [
    "library(\"clusterCrit\")\n",
    "datatypes<-c(\"equal\",\"heterogeneous\")\n",
    "typenums<-c(5,10,15)\n",
    "data_names<-c('VAE_FCTAE_EM','AE_FAETC_EM', 'AE_FCTAE_EM', 'DAE_FAETC_EM', 'DAE_FCTAE_EM','SVAE_FCTAE_EM','MMDVAE_EM')\n",
    "\n",
    "for(datatype in datatypes){\n",
    "    cat(datatype)\n",
    "    cat('\\n')\n",
    "    for(typenum in typenums){\n",
    "        cat(typenum)\n",
    "        cat('\\n')\n",
    "        for(data_name in data_names){\n",
    "            cat(data_name)\n",
    "            cat(':\\t')\n",
    "            \n",
    "            datapath=paste(\"../data/simulations/\",datatype,'/',typenum,'/',data_name,'_',typenum,'.txt',sep='')\n",
    "            factorization=read.table(datapath, sep=\" \", header=FALSE)\n",
    "            factors=factorization\n",
    "            all_c_index <- numeric(0)\n",
    "            for (run in 1:1) {\n",
    "                \n",
    "                kmeans.out <- kmeans(factors, centers=number_cl) \n",
    "                clust_iCluster <- kmeans.out$cluster\n",
    "                c_index <- numeric(0)\n",
    "                c_index <- c(c_index, intCriteria(as.matrix(factors),clust_iCluster, crit=c(\"C_index\"))$c_index)\n",
    "\n",
    "            cat(c_index)\n",
    "            cat('\\t')\n",
    "        }\n",
    "        cat('\\n')\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R",
   "language": "R",
   "name": "ir"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}