--- a +++ b/Analysis top20 vs bottom20.ipynb @@ -0,0 +1,175 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8KSBPEA9iZc1" + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "Initialize environment and import necessary libraries for the detailed comparison\n", + "of top 20% vs bottom 20% diverse head and neck cancer clinical trials.\n", + "\"\"\"\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import plotly\n", + "import plotly.express as px" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "U1XH-hqSieep" + }, + "outputs": [], + "source": [ + "# Load preprocessed datasets of top 20% and bottom 20% diverse studies\n", + "df_top = pd.read_csv(\"t20.csv\")\n", + "df_bottom = pd.read_csv(\"b20.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 183, + "status": "ok", + "timestamp": 1711119148341, + "user": { + "displayName": "Ojasvi Vachharajani", + "userId": "08925121883437033531" + }, + "user_tz": 420 + }, + "id": "i2oWqy-Fiehy", + "outputId": "599a5439-bf8a-437b-b404-10b0393e84ee" + }, + "outputs": [], + "source": [ + "set(df_top.columns) == set(df_bottom.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sMugRtp8pli-" + }, + "outputs": [], + "source": [ + "df_top[\"success_category\"] = \"top\"\n", + "df_bottom[\"success_category\"] = \"bottom\"\n", + "\n", + "df_all = pd.concat([df_top, df_bottom])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 4, + "status": "ok", + "timestamp": 1711119288331, + "user": { + "displayName": "Ojasvi Vachharajani", + "userId": "08925121883437033531" + }, + "user_tz": 420 + }, + "id": "he-X6Ez4qCzq", + "outputId": "b7f15feb-b680-4ca4-d754-2fd769fc0629" + }, + "outputs": [], + "source": [ + "df_all.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 542 + }, + "executionInfo": { + "elapsed": 2, + "status": "ok", + "timestamp": 1711119330500, + "user": { + "displayName": "Ojasvi Vachharajani", + "userId": "08925121883437033531" + }, + "user_tz": 420 + }, + "id": "w4ohZwM8t3kU", + "outputId": "033642de-9d3c-417b-f713-4ac567a3833a" + }, + "outputs": [], + "source": [ + "px.box(df_all, x=\"success_category\", y=\"num_participants\").update_layout(width=700)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 542 + }, + "executionInfo": { + "elapsed": 221, + "status": "ok", + "timestamp": 1711119560043, + "user": { + "displayName": "Ojasvi Vachharajani", + "userId": "08925121883437033531" + }, + "user_tz": 420 + }, + "id": "fvVRp9sxt3nd", + "outputId": "4b43d996-9082-4da0-e42b-d37264c8f0ef" + }, + "outputs": [], + "source": [ + "df_num_participants = pd.concat(\n", + " [\n", + " df_all[[\"success_category\", \"num_male_participants\"]].assign(sex=\"male\").rename(columns={\"num_male_participants\": \"num_participants\"}),\n", + " df_all[[\"success_category\", \"num_female_participants\"]].assign(sex=\"female\").rename(columns={\"num_female_participants\": \"num_participants\"}),\n", + " ]\n", + ")\n", + "\n", + "px.box(df_num_participants, x=\"success_category\", y=\"num_participants\", color=\"sex\").update_layout(width=700)" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyMDSF+xer7CPEwEYBcCijkb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}