176 lines (175 with data), 4.1 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "8KSBPEA9iZc1"
},
"outputs": [],
"source": [
"\"\"\"\n",
"Initialize environment and import necessary libraries for the detailed comparison\n",
"of top 20% vs bottom 20% diverse head and neck cancer clinical trials.\n",
"\"\"\"\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"import plotly\n",
"import plotly.express as px"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "U1XH-hqSieep"
},
"outputs": [],
"source": [
"# Load preprocessed datasets of top 20% and bottom 20% diverse studies\n",
"df_top = pd.read_csv(\"t20.csv\")\n",
"df_bottom = pd.read_csv(\"b20.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 183,
"status": "ok",
"timestamp": 1711119148341,
"user": {
"displayName": "Ojasvi Vachharajani",
"userId": "08925121883437033531"
},
"user_tz": 420
},
"id": "i2oWqy-Fiehy",
"outputId": "599a5439-bf8a-437b-b404-10b0393e84ee"
},
"outputs": [],
"source": [
"set(df_top.columns) == set(df_bottom.columns)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "sMugRtp8pli-"
},
"outputs": [],
"source": [
"df_top[\"success_category\"] = \"top\"\n",
"df_bottom[\"success_category\"] = \"bottom\"\n",
"\n",
"df_all = pd.concat([df_top, df_bottom])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 4,
"status": "ok",
"timestamp": 1711119288331,
"user": {
"displayName": "Ojasvi Vachharajani",
"userId": "08925121883437033531"
},
"user_tz": 420
},
"id": "he-X6Ez4qCzq",
"outputId": "b7f15feb-b680-4ca4-d754-2fd769fc0629"
},
"outputs": [],
"source": [
"df_all.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 542
},
"executionInfo": {
"elapsed": 2,
"status": "ok",
"timestamp": 1711119330500,
"user": {
"displayName": "Ojasvi Vachharajani",
"userId": "08925121883437033531"
},
"user_tz": 420
},
"id": "w4ohZwM8t3kU",
"outputId": "033642de-9d3c-417b-f713-4ac567a3833a"
},
"outputs": [],
"source": [
"px.box(df_all, x=\"success_category\", y=\"num_participants\").update_layout(width=700)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 542
},
"executionInfo": {
"elapsed": 221,
"status": "ok",
"timestamp": 1711119560043,
"user": {
"displayName": "Ojasvi Vachharajani",
"userId": "08925121883437033531"
},
"user_tz": 420
},
"id": "fvVRp9sxt3nd",
"outputId": "4b43d996-9082-4da0-e42b-d37264c8f0ef"
},
"outputs": [],
"source": [
"df_num_participants = pd.concat(\n",
" [\n",
" df_all[[\"success_category\", \"num_male_participants\"]].assign(sex=\"male\").rename(columns={\"num_male_participants\": \"num_participants\"}),\n",
" df_all[[\"success_category\", \"num_female_participants\"]].assign(sex=\"female\").rename(columns={\"num_female_participants\": \"num_participants\"}),\n",
" ]\n",
")\n",
"\n",
"px.box(df_num_participants, x=\"success_category\", y=\"num_participants\", color=\"sex\").update_layout(width=700)"
]
}
],
"metadata": {
"colab": {
"authorship_tag": "ABX9TyMDSF+xer7CPEwEYBcCijkb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}