180 lines (179 with data), 9.4 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VAE_FCTAE_EM:\tclust_iCluster: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \n",
"0 0.8571429 1\n",
"AE_FAETC_EM:\tclust_iCluster: 3 3 3 1 3 3 3 3 3 3 1 1 3 3 1 1 3 1 1 1 1 3 1 2 1 2 3 1 3 3 1 3 1 1 1 1 1 3 1 1 1 3 1 1 3 1 3 2 3 3 3 1 3 1 3 1 1 1 3 2 1 1 3 1 1 1 3 1 1 1 1 3 1 1 1 3 1 1 1 3 1 3 3 1 1 3 2 1 3 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \n",
"0 0.6115702 0\n",
"AE_FCTAE_EM:\tclust_iCluster: 1 1 1 2 1 1 1 1 1 1 2 2 1 1 2 2 1 1 2 2 2 1 1 2 2 2 2 1 1 1 1 1 2 2 1 2 1 1 1 1 2 1 1 2 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 2 2 2 1 1 2 1 2 1 1 1 1 1 2 1 1 2 1 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 2 2 3 2 3 3 3 3 3 2 3 2 2 3 3 2 2 3 3 2 2 3 3 3 3 3 3 3 3 3 3 3 2 2 3 3 3 3 3 2 2 3 2 3 3 2 2 2 3 3 3 3 3 3 2 3 2 3 3 3 3 3 3 2 2 2 3 2 2 3 3 2 \n",
"0 0.4242424 0.6486486\n",
"DAE_FAETC_EM:\tclust_iCluster: 1 1 1 2 1 1 1 1 1 1 2 2 1 1 2 2 1 2 2 2 2 1 2 3 2 3 1 2 1 2 2 1 2 2 2 2 2 1 2 2 2 1 2 2 1 2 1 3 1 1 1 2 2 2 1 2 2 2 1 3 2 2 1 2 2 2 1 2 2 2 2 1 2 2 2 1 2 2 2 1 2 1 1 2 2 1 3 2 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 \n",
"0 0 0.6115702\n",
"DAE_FCTAE_EM:\tclust_iCluster: 1 1 1 1 3 3 1 3 1 1 1 1 1 3 1 1 1 1 1 1 1 3 1 2 1 1 1 1 1 1 1 1 1 1 3 1 1 3 1 1 1 1 1 1 1 1 3 2 3 1 1 1 1 1 3 1 1 1 1 2 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 3 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 \n",
"0.00862069 0.6271186 0\n",
"SVAE_FCTAE_EM:\tclust_iCluster: 1 1 2 1 2 2 2 2 1 1 3 1 2 2 1 1 2 1 1 1 1 2 2 3 3 3 2 1 1 2 1 2 1 1 2 1 1 2 1 2 1 2 1 1 1 2 2 3 2 1 2 1 1 2 2 2 2 2 2 3 1 1 2 2 1 1 2 1 1 1 1 2 1 2 1 2 2 1 2 2 1 2 2 1 2 1 3 3 2 1 3 3 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 1 1 3 3 1 1 3 2 2 1 1 1 1 2 2 2 1 1 2 1 3 1 2 1 1 3 2 1 1 3 1 1 2 1 1 1 1 1 2 1 2 3 1 1 1 2 1 1 3 1 1 1 1 1 1 2 1 2 1 3 2 1 1 1 1 1 2 1 1 1 1 1 1 3 3 1 1 1 1 1 1 \n",
"0.4065041 0.1403509 0.6034483\n",
"MMDVAE_EM:\tclust_iCluster: 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \n",
"1 0 0.9130435\n"
]
}
],
"source": [
"library(\"clusterCrit\")\n",
"data_names<-c('VAE_FCTAE_EM','AE_FAETC_EM', 'AE_FCTAE_EM', 'DAE_FAETC_EM', 'DAE_FCTAE_EM','SVAE_FCTAE_EM','MMDVAE_EM')\n",
"for(data_name in data_names){\n",
" cat(data_name)\n",
" cat(':\\t')\n",
" \n",
" number_cl=3\n",
" ind <- 0\n",
" JI_final <- matrix(data=NA, nrow=number_cl, ncol=1)\n",
" #cat(JI_final,\"\\n\")\n",
" # Read clusters imposed on simulated data \n",
" \n",
" cl <- as.matrix(read.table(\"../data/single-cell/celltype2.txt\", \n",
" sep=\"\\t\", header=FALSE))\n",
" cl2 <- as.matrix(as.numeric(cl[,2]))\n",
" rownames(cl2) <- cl[,1]\n",
" #cat(cl2)\n",
"\n",
" factor_path=paste(\"../data/single-cell/\",data_name,'.txt',sep='')\n",
" factorization=read.table(factor_path, sep=\" \",row.names=cl[,1], header=FALSE)\n",
"\n",
"\n",
" factors <- factorization\n",
"\n",
" # Clustering by Kmeans\n",
" JI_good <- numeric(0)\n",
" for (run in 1:1) {\n",
" kmeans.out <- kmeans(factors, centers=number_cl) \n",
" clust_iCluster <- as.matrix(kmeans.out$cluster)\n",
" cat(\"clust_iCluster:\",clust_iCluster,\"\\n\")\n",
" ######creation sets of samples\n",
" JI_mat <- numeric(0)\n",
" for (p in 1:number_cl) {\n",
" x1 <- rownames(clust_iCluster)[which(clust_iCluster[,1]==p)]\n",
" #print(x1)\n",
"\n",
" row <- numeric(0)\n",
" for(j in 1:number_cl) {\n",
" x2 <- rownames(cl2)[which(cl2[,1]==j)]\n",
" #cat(\"x2:\",x2,\"\\n\")\n",
" I <- length(intersect(x1,x2))#交集\n",
" #cat(\"I:\",I,\"\\n\")\n",
" S <- I/(length(x1)+length(x2)-I)\n",
" #cat(\"S:\",S,\"\\n\")\n",
" row <- cbind(row,S)\n",
" #cat(\"row:\",row,\"\\n\")\n",
" }\n",
" JI_mat <- rbind(JI_mat,row)\n",
" #cat(\"JI_mat:\",JI_mat,\"\\n\")\n",
" #print(JI_mat)\n",
"\n",
"\n",
" }\n",
" JI_good <- rbind(JI_good,apply(JI_mat,1,max))\n",
" #cat(\"JI_good:\",JI_good,\"\\n\")\n",
" }\n",
" JI_final[1:number_cl] <- apply(JI_good,2,mean)\n",
" cat(JI_final)\n",
" cat('\\n')\n",
" \n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"equal\n",
"5\n",
"VAE_FCTAE_EM:\t0.0544775\tAE_FAETC_EM:\t0.01141185\tAE_FCTAE_EM:\t0.08357372\tDAE_FAETC_EM:\t0.04257227\tDAE_FCTAE_EM:\t0.1270777\tSVAE_FCTAE_EM:\t0.2863522\tMMDVAE_EM:\t0.1695007\t\n",
"10\n",
"VAE_FCTAE_EM:\t0.07769484\tAE_FAETC_EM:\t0.1117876\tAE_FCTAE_EM:\t0.2016015\tDAE_FAETC_EM:\t0.1228961\tDAE_FCTAE_EM:\t0.2104492\tSVAE_FCTAE_EM:\t0.3060561\tMMDVAE_EM:\t0.1946011\t\n",
"15\n",
"VAE_FCTAE_EM:\t0.09746227\tAE_FAETC_EM:\t0.2486277\tAE_FCTAE_EM:\t0.2577513\tDAE_FAETC_EM:\t0.1078301\tDAE_FCTAE_EM:\t0.1926133\tSVAE_FCTAE_EM:\t0.3067726\tMMDVAE_EM:\t0.1547294\t\n",
"heterogeneous\n",
"5\n",
"VAE_FCTAE_EM:\t0.06913109\tAE_FAETC_EM:\t0.02427257\tAE_FCTAE_EM:\t0.1161989\tDAE_FAETC_EM:\t0.1392568\tDAE_FCTAE_EM:\t0.0851285\tSVAE_FCTAE_EM:\t0.251067\tMMDVAE_EM:\t0.1270979\t\n",
"10\n",
"VAE_FCTAE_EM:\t0.07044105\tAE_FAETC_EM:\t0.07937024\tAE_FCTAE_EM:\t0.2033531\tDAE_FAETC_EM:\t0.2020112\tDAE_FCTAE_EM:\t0.1552261\tSVAE_FCTAE_EM:\t0.2597297\tMMDVAE_EM:\t0.1631524\t\n",
"15\n",
"VAE_FCTAE_EM:\t0.1046205\tAE_FAETC_EM:\t0.08161738\tAE_FCTAE_EM:\t0.2291148\tDAE_FAETC_EM:\t0.1344086\tDAE_FCTAE_EM:\t0.1581898\tSVAE_FCTAE_EM:\t0.3091611\tMMDVAE_EM:\t0.06387829\t\n"
]
}
],
"source": [
"library(\"clusterCrit\")\n",
"datatypes<-c(\"equal\",\"heterogeneous\")\n",
"typenums<-c(5,10,15)\n",
"data_names<-c('VAE_FCTAE_EM','AE_FAETC_EM', 'AE_FCTAE_EM', 'DAE_FAETC_EM', 'DAE_FCTAE_EM','SVAE_FCTAE_EM','MMDVAE_EM')\n",
"\n",
"for(datatype in datatypes){\n",
" cat(datatype)\n",
" cat('\\n')\n",
" for(typenum in typenums){\n",
" cat(typenum)\n",
" cat('\\n')\n",
" for(data_name in data_names){\n",
" cat(data_name)\n",
" cat(':\\t')\n",
" \n",
" datapath=paste(\"../data/simulations/\",datatype,'/',typenum,'/',data_name,'_',typenum,'.txt',sep='')\n",
" factorization=read.table(datapath, sep=\" \", header=FALSE)\n",
" factors=factorization\n",
" all_c_index <- numeric(0)\n",
" for (run in 1:1) {\n",
" \n",
" kmeans.out <- kmeans(factors, centers=number_cl) \n",
" clust_iCluster <- kmeans.out$cluster\n",
" c_index <- numeric(0)\n",
" c_index <- c(c_index, intCriteria(as.matrix(factors),clust_iCluster, crit=c(\"C_index\"))$c_index)\n",
"\n",
" cat(c_index)\n",
" cat('\\t')\n",
" }\n",
" cat('\\n')\n",
" }\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}