--- a +++ b/notebooks/stats.ipynb @@ -0,0 +1,159 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + " dataset cutoff algorithm rank_auc \n", + " geis_250.00 :30 1 visit :210 LR :210 Min. : 1.0 \n", + " geis_250.00_cutoff182:30 1 year :210 RF :210 1st Qu.: 8.0 \n", + " geis_250.00_cutoff365:30 6 months:210 XGB:210 Median :15.5 \n", + " geis_250.40 :30 Mean :15.5 \n", + " geis_250.40_cutoff182:30 3rd Qu.:23.0 \n", + " geis_250.40_cutoff365:30 Max. :30.0 \n", + " geis_327.23 :30 \n", + " geis_327.23_cutoff182:30 \n", + " geis_327.23_cutoff365:30 \n", + " geis_331.0 :30 \n", + " geis_331.0_cutoff182 :30 \n", + " geis_331.0_cutoff365 :30 \n", + " geis_530.81 :30 \n", + " geis_530.81_cutoff182:30 \n", + " geis_530.81_cutoff365:30 \n", + " geis_571.8 :30 \n", + " geis_571.8_cutoff182 :30 \n", + " geis_571.8_cutoff365 :30 \n", + " geis_585.9 :30 \n", + " geis_585.9_cutoff182 :30 \n", + " geis_585.9_cutoff365 :30 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "setwd('/media/bill/Drive/projects/geis-ehr/analysis')\n", + "\n", + "df <- read.csv(\"auc_rankings.csv\",header=TRUE,sep=',')\n", + "# df <- subset(df,!is.na(rank))\n", + "# df <- subset(df,dataset!='505_tecator')\n", + "# df <- subset(df,algorithm != 'LR')\n", + "summary(df,maxsum=21)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + "\tPairwise comparisons using Wilcoxon signed rank test \n", + "\n", + "data: dfs$rank_auc and dfs$algorithm \n", + "\n", + " LR RF \n", + "RF 1.0e-12 - \n", + "XGB 1.1e-12 7.8e-12\n", + "\n", + "P value adjustment method: bonferroni " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "dfs <- subset(df,cutoff=='1 visit')\n", + "pairwise.wilcox.test(dfs$rank_auc, dfs$algorithm, p.adjust.method = 'bonferroni',\n", + " paired = T)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + "\tPairwise comparisons using Wilcoxon signed rank test \n", + "\n", + "data: dfs$rank_auc and dfs$algorithm \n", + "\n", + " LR RF \n", + "RF 1.0e-12 - \n", + "XGB 1.0e-12 2.3e-12\n", + "\n", + "P value adjustment method: bonferroni " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "dfs <- subset(df,cutoff=='6 months')\n", + "pairwise.wilcox.test(dfs$rank_auc, dfs$algorithm, p.adjust.method = 'bonferroni',\n", + " paired = T)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + "\tPairwise comparisons using Wilcoxon signed rank test \n", + "\n", + "data: dfs$rank_auc and dfs$algorithm \n", + "\n", + " LR RF \n", + "RF 1.0e-12 - \n", + "XGB 1.0e-12 9.5e-12\n", + "\n", + "P value adjustment method: bonferroni " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "dfs <- subset(df,cutoff=='1 year')\n", + "pairwise.wilcox.test(dfs$rank_auc, dfs$algorithm, p.adjust.method = 'bonferroni',\n", + " paired = T)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "codemirror_mode": "r", + "file_extension": ".r", + "mimetype": "text/x-r-source", + "name": "R", + "pygments_lexer": "r", + "version": "3.5.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}