Switch to unified view

a b/Analysis top20 vs bottom20.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": null,
6
   "metadata": {
7
    "id": "8KSBPEA9iZc1"
8
   },
9
   "outputs": [],
10
   "source": [
11
    "\"\"\"\n",
12
    "Initialize environment and import necessary libraries for the detailed comparison\n",
13
    "of top 20% vs bottom 20% diverse head and neck cancer clinical trials.\n",
14
    "\"\"\"\n",
15
    "\n",
16
    "import numpy as np\n",
17
    "import pandas as pd\n",
18
    "\n",
19
    "import plotly\n",
20
    "import plotly.express as px"
21
   ]
22
  },
23
  {
24
   "cell_type": "code",
25
   "execution_count": null,
26
   "metadata": {
27
    "id": "U1XH-hqSieep"
28
   },
29
   "outputs": [],
30
   "source": [
31
    "# Load preprocessed datasets of top 20% and bottom 20% diverse studies\n",
32
    "df_top = pd.read_csv(\"t20.csv\")\n",
33
    "df_bottom = pd.read_csv(\"b20.csv\")"
34
   ]
35
  },
36
  {
37
   "cell_type": "code",
38
   "execution_count": null,
39
   "metadata": {
40
    "colab": {
41
     "base_uri": "https://localhost:8080/"
42
    },
43
    "executionInfo": {
44
     "elapsed": 183,
45
     "status": "ok",
46
     "timestamp": 1711119148341,
47
     "user": {
48
      "displayName": "Ojasvi Vachharajani",
49
      "userId": "08925121883437033531"
50
     },
51
     "user_tz": 420
52
    },
53
    "id": "i2oWqy-Fiehy",
54
    "outputId": "599a5439-bf8a-437b-b404-10b0393e84ee"
55
   },
56
   "outputs": [],
57
   "source": [
58
    "set(df_top.columns) == set(df_bottom.columns)"
59
   ]
60
  },
61
  {
62
   "cell_type": "code",
63
   "execution_count": null,
64
   "metadata": {
65
    "id": "sMugRtp8pli-"
66
   },
67
   "outputs": [],
68
   "source": [
69
    "df_top[\"success_category\"] = \"top\"\n",
70
    "df_bottom[\"success_category\"] = \"bottom\"\n",
71
    "\n",
72
    "df_all = pd.concat([df_top, df_bottom])"
73
   ]
74
  },
75
  {
76
   "cell_type": "code",
77
   "execution_count": null,
78
   "metadata": {
79
    "colab": {
80
     "base_uri": "https://localhost:8080/"
81
    },
82
    "executionInfo": {
83
     "elapsed": 4,
84
     "status": "ok",
85
     "timestamp": 1711119288331,
86
     "user": {
87
      "displayName": "Ojasvi Vachharajani",
88
      "userId": "08925121883437033531"
89
     },
90
     "user_tz": 420
91
    },
92
    "id": "he-X6Ez4qCzq",
93
    "outputId": "b7f15feb-b680-4ca4-d754-2fd769fc0629"
94
   },
95
   "outputs": [],
96
   "source": [
97
    "df_all.columns"
98
   ]
99
  },
100
  {
101
   "cell_type": "code",
102
   "execution_count": null,
103
   "metadata": {
104
    "colab": {
105
     "base_uri": "https://localhost:8080/",
106
     "height": 542
107
    },
108
    "executionInfo": {
109
     "elapsed": 2,
110
     "status": "ok",
111
     "timestamp": 1711119330500,
112
     "user": {
113
      "displayName": "Ojasvi Vachharajani",
114
      "userId": "08925121883437033531"
115
     },
116
     "user_tz": 420
117
    },
118
    "id": "w4ohZwM8t3kU",
119
    "outputId": "033642de-9d3c-417b-f713-4ac567a3833a"
120
   },
121
   "outputs": [],
122
   "source": [
123
    "px.box(df_all, x=\"success_category\", y=\"num_participants\").update_layout(width=700)"
124
   ]
125
  },
126
  {
127
   "cell_type": "code",
128
   "execution_count": null,
129
   "metadata": {
130
    "colab": {
131
     "base_uri": "https://localhost:8080/",
132
     "height": 542
133
    },
134
    "executionInfo": {
135
     "elapsed": 221,
136
     "status": "ok",
137
     "timestamp": 1711119560043,
138
     "user": {
139
      "displayName": "Ojasvi Vachharajani",
140
      "userId": "08925121883437033531"
141
     },
142
     "user_tz": 420
143
    },
144
    "id": "fvVRp9sxt3nd",
145
    "outputId": "4b43d996-9082-4da0-e42b-d37264c8f0ef"
146
   },
147
   "outputs": [],
148
   "source": [
149
    "df_num_participants = pd.concat(\n",
150
    "    [\n",
151
    "        df_all[[\"success_category\", \"num_male_participants\"]].assign(sex=\"male\").rename(columns={\"num_male_participants\": \"num_participants\"}),\n",
152
    "        df_all[[\"success_category\", \"num_female_participants\"]].assign(sex=\"female\").rename(columns={\"num_female_participants\": \"num_participants\"}),\n",
153
    "    ]\n",
154
    ")\n",
155
    "\n",
156
    "px.box(df_num_participants, x=\"success_category\", y=\"num_participants\", color=\"sex\").update_layout(width=700)"
157
   ]
158
  }
159
 ],
160
 "metadata": {
161
  "colab": {
162
   "authorship_tag": "ABX9TyMDSF+xer7CPEwEYBcCijkb",
163
   "provenance": []
164
  },
165
  "kernelspec": {
166
   "display_name": "Python 3",
167
   "name": "python3"
168
  },
169
  "language_info": {
170
   "name": "python"
171
  }
172
 },
173
 "nbformat": 4,
174
 "nbformat_minor": 0
175
}