[75f8b6]: / Covid_Clinical_Trials_Analysis.ipynb

Download this file

2439 lines (2438 with data), 127.5 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "bb78e666",
   "metadata": {},
   "source": [
    "# Assignment: 01"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "947166c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "e66ecaac",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"COVID clinical trials.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0fb72044",
   "metadata": {},
   "source": [
    "#### Q1. Read Dataset and Explore the dataset by checking shape, columns, see the first/last 'n' rows using head/tail. (n= 5,15,30) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "950e9b57",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Shape of the dataset: (5783, 27)\n"
     ]
    }
   ],
   "source": [
    "print(\"Shape of the dataset:\", df.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "648b76fa",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Columns in the dataset: Index(['Rank', 'NCT Number', 'Title', 'Acronym', 'Status', 'Study Results',\n",
      "       'Conditions', 'Interventions', 'Outcome Measures',\n",
      "       'Sponsor/Collaborators', 'Gender', 'Age', 'Phases', 'Enrollment',\n",
      "       'Funded Bys', 'Study Type', 'Study Designs', 'Other IDs', 'Start Date',\n",
      "       'Primary Completion Date', 'Completion Date', 'First Posted',\n",
      "       'Results First Posted', 'Last Update Posted', 'Locations',\n",
      "       'Study Documents', 'URL'],\n",
      "      dtype='object')\n"
     ]
    }
   ],
   "source": [
    "print(\"Columns in the dataset:\", df.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "d87b8aeb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['All', 'Female', 'Male', nan], dtype=object)"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Gender'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "aa5ec1f7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Gender\n",
       "All       5567\n",
       "Female     162\n",
       "Male        44\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Gender'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "9da4780a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sum of duplicate rows for numeric columns:\n",
      "Rank                         0\n",
      "NCT Number                   0\n",
      "Title                        0\n",
      "Acronym                      0\n",
      "Status                       0\n",
      "Study Results                0\n",
      "Conditions                   0\n",
      "Interventions                0\n",
      "Outcome Measures             0\n",
      "Sponsor/Collaborators        0\n",
      "Gender                       0\n",
      "Age                          0\n",
      "Phases                       0\n",
      "Enrollment                 0.0\n",
      "Funded Bys                   0\n",
      "Study Type                   0\n",
      "Study Designs                0\n",
      "Other IDs                    0\n",
      "Start Date                   0\n",
      "Primary Completion Date      0\n",
      "Completion Date              0\n",
      "First Posted                 0\n",
      "Results First Posted         0\n",
      "Last Update Posted           0\n",
      "Locations                    0\n",
      "Study Documents              0\n",
      "URL                          0\n",
      "dtype: object\n"
     ]
    }
   ],
   "source": [
    "print(\"Sum of duplicate rows for numeric columns:\")\n",
    "print(sum_of_duplicates)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "652eff47",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "First 5 rows:    Rank   NCT Number                                              Title  \\\n",
      "0     1  NCT04785898  Diagnostic Performance of the ID Now™ COVID-19...   \n",
      "1     2  NCT04595136  Study to Evaluate the Efficacy of COVID19-0001...   \n",
      "2     3  NCT04395482  Lung CT Scan Analysis of SARS-CoV2 Induced Lun...   \n",
      "3     4  NCT04416061  The Role of a Private Hospital in Hong Kong Am...   \n",
      "4     5  NCT04395924         Maternal-foetal Transmission of SARS-Cov-2   \n",
      "\n",
      "        Acronym                  Status         Study Results  \\\n",
      "0   COVID-IDNow  Active, not recruiting  No Results Available   \n",
      "1      COVID-19      Not yet recruiting  No Results Available   \n",
      "2   TAC-COVID19              Recruiting  No Results Available   \n",
      "3      COVID-19  Active, not recruiting  No Results Available   \n",
      "4  TMF-COVID-19              Recruiting  No Results Available   \n",
      "\n",
      "                                          Conditions  \\\n",
      "0                                            Covid19   \n",
      "1                               SARS-CoV-2 Infection   \n",
      "2                                            covid19   \n",
      "3                                              COVID   \n",
      "4  Maternal Fetal Infection Transmission|COVID-19...   \n",
      "\n",
      "                                       Interventions  \\\n",
      "0   Diagnostic Test: ID Now™ COVID-19 Screening Test   \n",
      "1    Drug: Drug COVID19-0001-USR|Drug: normal saline   \n",
      "2  Other: Lung CT scan analysis in COVID-19 patients   \n",
      "3          Diagnostic Test: COVID 19 Diagnostic Test   \n",
      "4  Diagnostic Test: Diagnosis of SARS-Cov2 by RT-...   \n",
      "\n",
      "                                    Outcome Measures  \\\n",
      "0  Evaluate the diagnostic performance of the ID ...   \n",
      "1  Change on viral load results from baseline aft...   \n",
      "2  A qualitative analysis of parenchymal lung dam...   \n",
      "3  Proportion of asymptomatic subjects|Proportion...   \n",
      "4  COVID-19 by positive PCR in cord blood and / o...   \n",
      "\n",
      "                               Sponsor/Collaborators  ...         Other IDs  \\\n",
      "0              Groupe Hospitalier Paris Saint Joseph  ...       COVID-IDNow   \n",
      "1                         United Medical Specialties  ...  COVID19-0001-USR   \n",
      "2                       University of Milano Bicocca  ...       TAC-COVID19   \n",
      "3                    Hong Kong Sanatorium & Hospital  ...        RC-2020-08   \n",
      "4  Centre Hospitalier Régional d'Orléans|Centre d...  ...      CHRO-2020-10   \n",
      "\n",
      "         Start Date Primary Completion Date   Completion Date  \\\n",
      "0  November 9, 2020       December 22, 2020    April 30, 2021   \n",
      "1  November 2, 2020       December 15, 2020  January 29, 2021   \n",
      "2       May 7, 2020           June 15, 2021     June 15, 2021   \n",
      "3      May 25, 2020           July 31, 2020   August 31, 2020   \n",
      "4       May 5, 2020                May 2021          May 2021   \n",
      "\n",
      "       First Posted Results First Posted Last Update Posted  \\\n",
      "0     March 8, 2021                  NaN      March 8, 2021   \n",
      "1  October 20, 2020                  NaN   October 20, 2020   \n",
      "2      May 20, 2020                  NaN   November 9, 2020   \n",
      "3      June 4, 2020                  NaN       June 4, 2020   \n",
      "4      May 20, 2020                  NaN       June 4, 2020   \n",
      "\n",
      "                                           Locations Study Documents  \\\n",
      "0  Groupe Hospitalier Paris Saint-Joseph, Paris, ...             NaN   \n",
      "1       Cimedical, Barranquilla, Atlantico, Colombia             NaN   \n",
      "2  Ospedale Papa Giovanni XXIII, Bergamo, Italy|P...             NaN   \n",
      "3  Hong Kong Sanatorium & Hospital, Hong Kong, Ho...             NaN   \n",
      "4                       CHR Orléans, Orléans, France             NaN   \n",
      "\n",
      "                                           URL  \n",
      "0  https://ClinicalTrials.gov/show/NCT04785898  \n",
      "1  https://ClinicalTrials.gov/show/NCT04595136  \n",
      "2  https://ClinicalTrials.gov/show/NCT04395482  \n",
      "3  https://ClinicalTrials.gov/show/NCT04416061  \n",
      "4  https://ClinicalTrials.gov/show/NCT04395924  \n",
      "\n",
      "[5 rows x 27 columns]\n"
     ]
    }
   ],
   "source": [
    "print(\"First 5 rows:\", df.head(5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "60183864",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "First 15 rows:     Rank   NCT Number                                              Title  \\\n",
      "0      1  NCT04785898  Diagnostic Performance of the ID Now™ COVID-19...   \n",
      "1      2  NCT04595136  Study to Evaluate the Efficacy of COVID19-0001...   \n",
      "2      3  NCT04395482  Lung CT Scan Analysis of SARS-CoV2 Induced Lun...   \n",
      "3      4  NCT04416061  The Role of a Private Hospital in Hong Kong Am...   \n",
      "4      5  NCT04395924         Maternal-foetal Transmission of SARS-Cov-2   \n",
      "5      6  NCT04516954          Convalescent Plasma for COVID-19 Patients   \n",
      "6      7  NCT04476940  COVID-19 Breastfeeding Guideline for African-A...   \n",
      "7      8  NCT04634214  The Severity of COVID 19 in Diabetes and Non-d...   \n",
      "8      9  NCT04602884  Early Detection of COVID-19 Using Breath Analysis   \n",
      "9     10  NCT04384588  COVID19-Convalescent Plasma for Treating Patie...   \n",
      "10    11  NCT04355897  CoVID-19 Plasma in Treatment of COVID-19 Patients   \n",
      "11    12  NCT04412265          Frailty in Elderly Patients With COVID-19   \n",
      "12    13  NCT04659759  COVID-19 Pregnancy Related Immunological, Clin...   \n",
      "13    14  NCT04427332     Smell and Taste Disorders in COVID-19 Patients   \n",
      "14    15  NCT04842708  Evaluation of Anti-COVID 19 Pfizer Vaccination...   \n",
      "\n",
      "         Acronym                   Status         Study Results  \\\n",
      "0    COVID-IDNow   Active, not recruiting  No Results Available   \n",
      "1       COVID-19       Not yet recruiting  No Results Available   \n",
      "2    TAC-COVID19               Recruiting  No Results Available   \n",
      "3       COVID-19   Active, not recruiting  No Results Available   \n",
      "4   TMF-COVID-19               Recruiting  No Results Available   \n",
      "5           CPCP  Enrolling by invitation  No Results Available   \n",
      "6       COVID-BF       Not yet recruiting  No Results Available   \n",
      "7        COVID19       Not yet recruiting  No Results Available   \n",
      "8       COVID-19                Suspended  No Results Available   \n",
      "9     FALP-COVID               Recruiting  No Results Available   \n",
      "10           NaN               Recruiting  No Results Available   \n",
      "11     FRA-COVID               Recruiting  No Results Available   \n",
      "12   COVID-PRICE               Recruiting  No Results Available   \n",
      "13  COVID-19 ORL                Completed  No Results Available   \n",
      "14      COVID-19               Recruiting  No Results Available   \n",
      "\n",
      "                                           Conditions  \\\n",
      "0                                             Covid19   \n",
      "1                                SARS-CoV-2 Infection   \n",
      "2                                             covid19   \n",
      "3                                               COVID   \n",
      "4   Maternal Fetal Infection Transmission|COVID-19...   \n",
      "5                                            COVID 19   \n",
      "6                     Covid19|Exclusive Breastfeeding   \n",
      "7                              Covid19|Type2 Diabetes   \n",
      "8                                             Covid19   \n",
      "9   COVID-19 Infection|Cancer Patients|General Pop...   \n",
      "10                                           COVID 19   \n",
      "11                                            Covid19   \n",
      "12                                            Covid19   \n",
      "13                                            covid19   \n",
      "14                                            Covid19   \n",
      "\n",
      "                                        Interventions  \\\n",
      "0    Diagnostic Test: ID Now™ COVID-19 Screening Test   \n",
      "1     Drug: Drug COVID19-0001-USR|Drug: normal saline   \n",
      "2   Other: Lung CT scan analysis in COVID-19 patients   \n",
      "3           Diagnostic Test: COVID 19 Diagnostic Test   \n",
      "4   Diagnostic Test: Diagnosis of SARS-Cov2 by RT-...   \n",
      "5            Biological: Convalescent COVID 19 Plasma   \n",
      "6          Behavioral: COVID-19 Breastfeeding Support   \n",
      "7                                                 NaN   \n",
      "8   Diagnostic Test: Breath biopsy sampling using ...   \n",
      "9   Biological: Convalescent Plasma from COVID-19 ...   \n",
      "10           Biological: Convalescent COVID 19 Plasma   \n",
      "11  Other: Relation between frailty and clinical o...   \n",
      "12  Other: COVID-19 exposure|Biological: COVID-19 ...   \n",
      "13  Other: Investigation of smell and taste disorders   \n",
      "14       Diagnostic Test: vaccination against COVID19   \n",
      "\n",
      "                                     Outcome Measures  \\\n",
      "0   Evaluate the diagnostic performance of the ID ...   \n",
      "1   Change on viral load results from baseline aft...   \n",
      "2   A qualitative analysis of parenchymal lung dam...   \n",
      "3   Proportion of asymptomatic subjects|Proportion...   \n",
      "4   COVID-19 by positive PCR in cord blood and / o...   \n",
      "5   Evaluate the safety|Change in requirement for ...   \n",
      "6   COVID-19 breastfeeding guidance adherence at b...   \n",
      "7   Severity of COVID 19 among people with and wit...   \n",
      "8   Correlation between Volatile Organic Compounds...   \n",
      "9   in-hospital mortality secondary to COVID-19 am...   \n",
      "10  Reduce mortality|Reduce requirement for mechan...   \n",
      "11  Development of a tool to measure frailty|A \"pr...   \n",
      "12  Maternal COVID-19 serology (IgG and IgM)|Mater...   \n",
      "13  Identification of demographic and clinical fac...   \n",
      "14  Association between breath VOCs and IgG in blo...   \n",
      "\n",
      "                                Sponsor/Collaborators  ...         Other IDs  \\\n",
      "0               Groupe Hospitalier Paris Saint Joseph  ...       COVID-IDNow   \n",
      "1                          United Medical Specialties  ...  COVID19-0001-USR   \n",
      "2                        University of Milano Bicocca  ...       TAC-COVID19   \n",
      "3                     Hong Kong Sanatorium & Hospital  ...        RC-2020-08   \n",
      "4   Centre Hospitalier Régional d'Orléans|Centre d...  ...      CHRO-2020-10   \n",
      "5   Vinmec Research Institute of Stem Cell and Gen...  ...       ISC.20.11.1   \n",
      "6                             Meharry Medical College  ...            330875   \n",
      "7   India Diabetes Research Foundation & Dr. A. Ra...  ...        IDRFARH015   \n",
      "8                   Scentech Medical Technologies Ltd  ...         Cov-2-IDF   \n",
      "9   Fundacion Arturo Lopez Perez|Confederación de ...  ...     FALP 001-2020   \n",
      "10                                The Christ Hospital  ...             20-23   \n",
      "11                       University of Milano Bicocca  ...         FRA-COVID   \n",
      "12                Thomas Jefferson University|Nemours  ...          20F.1043   \n",
      "13                       University of Milano Bicocca  ...      COVID-19 ORL   \n",
      "14                  Scentech Medical Technologies Ltd  ...  Cov-2-SMC-V-2020   \n",
      "\n",
      "            Start Date Primary Completion Date    Completion Date  \\\n",
      "0     November 9, 2020       December 22, 2020     April 30, 2021   \n",
      "1     November 2, 2020       December 15, 2020   January 29, 2021   \n",
      "2          May 7, 2020           June 15, 2021      June 15, 2021   \n",
      "3         May 25, 2020           July 31, 2020    August 31, 2020   \n",
      "4          May 5, 2020                May 2021           May 2021   \n",
      "5       August 1, 2020       November 30, 2020  December 30, 2020   \n",
      "6       September 2020            October 2021          June 2022   \n",
      "7    November 16, 2020       February 16, 2021       May 16, 2021   \n",
      "8   September 22, 2020       December 30, 2021  December 30, 2021   \n",
      "9        April 7, 2020           April 6, 2021      April 6, 2021   \n",
      "10      April 28, 2020               July 2020        August 2020   \n",
      "11      April 16, 2020           March 1, 2021      March 1, 2021   \n",
      "12   November 17, 2020       December 31, 2021      June 30, 2022   \n",
      "13       June 11, 2020        October 16, 2020   October 16, 2020   \n",
      "14   December 24, 2020        October 24, 2021   October 24, 2021   \n",
      "\n",
      "         First Posted Results First Posted Last Update Posted  \\\n",
      "0       March 8, 2021                  NaN      March 8, 2021   \n",
      "1    October 20, 2020                  NaN   October 20, 2020   \n",
      "2        May 20, 2020                  NaN   November 9, 2020   \n",
      "3        June 4, 2020                  NaN       June 4, 2020   \n",
      "4        May 20, 2020                  NaN       June 4, 2020   \n",
      "5     August 18, 2020                  NaN    August 20, 2020   \n",
      "6       July 20, 2020                  NaN      July 20, 2020   \n",
      "7   November 18, 2020                  NaN  November 18, 2020   \n",
      "8    October 26, 2020                  NaN     April 13, 2021   \n",
      "9        May 12, 2020                  NaN       May 12, 2020   \n",
      "10     April 21, 2020                  NaN       May 20, 2020   \n",
      "11       June 2, 2020                  NaN   January 22, 2021   \n",
      "12   December 9, 2020                  NaN      March 5, 2021   \n",
      "13      June 11, 2020                  NaN   January 27, 2021   \n",
      "14     April 13, 2021                  NaN     April 13, 2021   \n",
      "\n",
      "                                            Locations Study Documents  \\\n",
      "0   Groupe Hospitalier Paris Saint-Joseph, Paris, ...             NaN   \n",
      "1        Cimedical, Barranquilla, Atlantico, Colombia             NaN   \n",
      "2   Ospedale Papa Giovanni XXIII, Bergamo, Italy|P...             NaN   \n",
      "3   Hong Kong Sanatorium & Hospital, Hong Kong, Ho...             NaN   \n",
      "4                        CHR Orléans, Orléans, France             NaN   \n",
      "5   Vinmec Research Institute of Stem cell and Gen...             NaN   \n",
      "6   Meharry Medical College, Nashville, Tennessee,...             NaN   \n",
      "7   Orthomed Hospital, Chennai, Tamil Nadu, India|...             NaN   \n",
      "8   IDF COVID 19 Isolation Facility, Ashkelon, Israel             NaN   \n",
      "9   Fundacion Arturo Lopez Perez, Providencia, San...             NaN   \n",
      "10  The Christ Hospital, Cincinnati, Ohio, United ...             NaN   \n",
      "11  Dipartimento di Geriatria, Fondazione Poliambu...             NaN   \n",
      "12  Thomas Jefferson University Hospital, Philadel...             NaN   \n",
      "13      ASST Monza-Ospedale San Gerardo, Monza, Italy             NaN   \n",
      "14       Shamir Medical Center, Be'er Ya'aqov, Israel             NaN   \n",
      "\n",
      "                                            URL  \n",
      "0   https://ClinicalTrials.gov/show/NCT04785898  \n",
      "1   https://ClinicalTrials.gov/show/NCT04595136  \n",
      "2   https://ClinicalTrials.gov/show/NCT04395482  \n",
      "3   https://ClinicalTrials.gov/show/NCT04416061  \n",
      "4   https://ClinicalTrials.gov/show/NCT04395924  \n",
      "5   https://ClinicalTrials.gov/show/NCT04516954  \n",
      "6   https://ClinicalTrials.gov/show/NCT04476940  \n",
      "7   https://ClinicalTrials.gov/show/NCT04634214  \n",
      "8   https://ClinicalTrials.gov/show/NCT04602884  \n",
      "9   https://ClinicalTrials.gov/show/NCT04384588  \n",
      "10  https://ClinicalTrials.gov/show/NCT04355897  \n",
      "11  https://ClinicalTrials.gov/show/NCT04412265  \n",
      "12  https://ClinicalTrials.gov/show/NCT04659759  \n",
      "13  https://ClinicalTrials.gov/show/NCT04427332  \n",
      "14  https://ClinicalTrials.gov/show/NCT04842708  \n",
      "\n",
      "[15 rows x 27 columns]\n"
     ]
    }
   ],
   "source": [
    "print(\"First 15 rows:\", df.head(15))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "a207a23d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "First 30 rows:     Rank   NCT Number                                              Title  \\\n",
      "0      1  NCT04785898  Diagnostic Performance of the ID Now™ COVID-19...   \n",
      "1      2  NCT04595136  Study to Evaluate the Efficacy of COVID19-0001...   \n",
      "2      3  NCT04395482  Lung CT Scan Analysis of SARS-CoV2 Induced Lun...   \n",
      "3      4  NCT04416061  The Role of a Private Hospital in Hong Kong Am...   \n",
      "4      5  NCT04395924         Maternal-foetal Transmission of SARS-Cov-2   \n",
      "5      6  NCT04516954          Convalescent Plasma for COVID-19 Patients   \n",
      "6      7  NCT04476940  COVID-19 Breastfeeding Guideline for African-A...   \n",
      "7      8  NCT04634214  The Severity of COVID 19 in Diabetes and Non-d...   \n",
      "8      9  NCT04602884  Early Detection of COVID-19 Using Breath Analysis   \n",
      "9     10  NCT04384588  COVID19-Convalescent Plasma for Treating Patie...   \n",
      "10    11  NCT04355897  CoVID-19 Plasma in Treatment of COVID-19 Patients   \n",
      "11    12  NCT04412265          Frailty in Elderly Patients With COVID-19   \n",
      "12    13  NCT04659759  COVID-19 Pregnancy Related Immunological, Clin...   \n",
      "13    14  NCT04427332     Smell and Taste Disorders in COVID-19 Patients   \n",
      "14    15  NCT04842708  Evaluation of Anti-COVID 19 Pfizer Vaccination...   \n",
      "15    16  NCT04367805  COVID-19 Infection in Patients With Hepatocell...   \n",
      "16    17  NCT04609969  Diagnostic Performance Evaluation of a Novel S...   \n",
      "17    18  NCT04817553  Impact of COVID-19 on the Clinical Outcomes an...   \n",
      "18    19  NCT04632355  Musculoskeletal Pain in Patients With Covid-19...   \n",
      "19    20  NCT04407585  Testing the Accuracy of a Digital Test to Diag...   \n",
      "20    21  NCT04657510                    Femoral frACturEs and COVID-19.   \n",
      "21    22  NCT04473170  Study Evaluating the Safety and Efficacy of Au...   \n",
      "22    23  NCT04497298  Clinical Trial to Evaluate the Safety and Immu...   \n",
      "23    24  NCT04549831     Genetic Bases of COVID-19 Clinical Variability   \n",
      "24    25  NCT04706390  Comparison of the Immune Response to Natural C...   \n",
      "25    26  NCT04632719  The MentalPlus® for Assessment and Rehabilitat...   \n",
      "26    27  NCT04427345               Predictive Factors COVID-19 Patients   \n",
      "27    28  NCT04424004  MURDOCK Cabarrus County COVID-19 Prevalence an...   \n",
      "28    29  NCT04407494  Diagnostic Value of Patient - Reported and Cli...   \n",
      "29    30  NCT04360811  Assessment of Obstetric, Fetal and Neonatal Ri...   \n",
      "\n",
      "          Acronym                   Status         Study Results  \\\n",
      "0     COVID-IDNow   Active, not recruiting  No Results Available   \n",
      "1        COVID-19       Not yet recruiting  No Results Available   \n",
      "2     TAC-COVID19               Recruiting  No Results Available   \n",
      "3        COVID-19   Active, not recruiting  No Results Available   \n",
      "4    TMF-COVID-19               Recruiting  No Results Available   \n",
      "5            CPCP  Enrolling by invitation  No Results Available   \n",
      "6        COVID-BF       Not yet recruiting  No Results Available   \n",
      "7         COVID19       Not yet recruiting  No Results Available   \n",
      "8        COVID-19                Suspended  No Results Available   \n",
      "9      FALP-COVID               Recruiting  No Results Available   \n",
      "10            NaN               Recruiting  No Results Available   \n",
      "11      FRA-COVID               Recruiting  No Results Available   \n",
      "12    COVID-PRICE               Recruiting  No Results Available   \n",
      "13   COVID-19 ORL                Completed  No Results Available   \n",
      "14       COVID-19               Recruiting  No Results Available   \n",
      "15  COVID19-CHIEF               Recruiting  No Results Available   \n",
      "16     COVID-VIRO                Completed  No Results Available   \n",
      "17     IgG4-COVID               Recruiting  No Results Available   \n",
      "18   Covid19-Pain       Not yet recruiting  No Results Available   \n",
      "19            NaN               Recruiting  No Results Available   \n",
      "20  FACE COVID-19               Recruiting  No Results Available   \n",
      "21   SENTAD-COVID                Completed  No Results Available   \n",
      "22   COVID-19-101   Active, not recruiting  No Results Available   \n",
      "23      GEN-COVID               Recruiting  No Results Available   \n",
      "24   COVID19vac-1               Recruiting  No Results Available   \n",
      "25       MP-COVID               Recruiting  No Results Available   \n",
      "26       MI-COVID               Recruiting  No Results Available   \n",
      "27           C3PI   Active, not recruiting  No Results Available   \n",
      "28     COVID-OLFA                Completed  No Results Available   \n",
      "29        COroFet               Recruiting  No Results Available   \n",
      "\n",
      "                                           Conditions  \\\n",
      "0                                             Covid19   \n",
      "1                                SARS-CoV-2 Infection   \n",
      "2                                             covid19   \n",
      "3                                               COVID   \n",
      "4   Maternal Fetal Infection Transmission|COVID-19...   \n",
      "5                                            COVID 19   \n",
      "6                     Covid19|Exclusive Breastfeeding   \n",
      "7                              Covid19|Type2 Diabetes   \n",
      "8                                             Covid19   \n",
      "9   COVID-19 Infection|Cancer Patients|General Pop...   \n",
      "10                                           COVID 19   \n",
      "11                                            Covid19   \n",
      "12                                            Covid19   \n",
      "13                                            covid19   \n",
      "14                                            Covid19   \n",
      "15                  Hepatocellular Carcinoma|COVID-19   \n",
      "16                       Covid19|SARS-CoV-2 Infection   \n",
      "17                       IgG4 Related Disease|Covid19   \n",
      "18                                            Covid19   \n",
      "19                                           Covid-19   \n",
      "20          Covid19|Femur Fracture|Fragility Fracture   \n",
      "21                Coronavirus Disease 2019 (COVID-19)   \n",
      "22                                           COVID-19   \n",
      "23                                           COVID-19   \n",
      "24                                            Covid19   \n",
      "25  Cognitive Impairment|Covid19|Cognitive Dysfunc...   \n",
      "26                                            covid19   \n",
      "27                                           COVID 19   \n",
      "28                                           COVID-19   \n",
      "29                                          Pregnancy   \n",
      "\n",
      "                                        Interventions  \\\n",
      "0    Diagnostic Test: ID Now™ COVID-19 Screening Test   \n",
      "1     Drug: Drug COVID19-0001-USR|Drug: normal saline   \n",
      "2   Other: Lung CT scan analysis in COVID-19 patients   \n",
      "3           Diagnostic Test: COVID 19 Diagnostic Test   \n",
      "4   Diagnostic Test: Diagnosis of SARS-Cov2 by RT-...   \n",
      "5            Biological: Convalescent COVID 19 Plasma   \n",
      "6          Behavioral: COVID-19 Breastfeeding Support   \n",
      "7                                                 NaN   \n",
      "8   Diagnostic Test: Breath biopsy sampling using ...   \n",
      "9   Biological: Convalescent Plasma from COVID-19 ...   \n",
      "10           Biological: Convalescent COVID 19 Plasma   \n",
      "11  Other: Relation between frailty and clinical o...   \n",
      "12  Other: COVID-19 exposure|Biological: COVID-19 ...   \n",
      "13  Other: Investigation of smell and taste disorders   \n",
      "14       Diagnostic Test: vaccination against COVID19   \n",
      "15    Diagnostic Test: nasopharyngeal Covid 19 RT-PCR   \n",
      "16  Diagnostic Test: RT-qPCR test|Diagnostic Test:...   \n",
      "17                         Other: exposure to COVID19   \n",
      "18                                   Other: Interview   \n",
      "19            Diagnostic Test: Covid-19 swab PCR test   \n",
      "20                                    Other: COVID-19   \n",
      "21  Biological: Autologous Non-Hematopoietic Perip...   \n",
      "22  Biological: Two COVID-19 vaccine candidate (TM...   \n",
      "23  Genetic: Massive parallel sequencing of host g...   \n",
      "24                       Biological: covid-19 vaccine   \n",
      "25  Device: The use of the MentalPlus® digital gam...   \n",
      "26  Other: Predictive factors for clinical respons...   \n",
      "27           Other: COVID-19 PCR and serology testing   \n",
      "28  Biological: Reporting of anosmia, ageusia and ...   \n",
      "29   Diagnostic Test: COVID 19 diagnostic test by PCR   \n",
      "\n",
      "                                     Outcome Measures  \\\n",
      "0   Evaluate the diagnostic performance of the ID ...   \n",
      "1   Change on viral load results from baseline aft...   \n",
      "2   A qualitative analysis of parenchymal lung dam...   \n",
      "3   Proportion of asymptomatic subjects|Proportion...   \n",
      "4   COVID-19 by positive PCR in cord blood and / o...   \n",
      "5   Evaluate the safety|Change in requirement for ...   \n",
      "6   COVID-19 breastfeeding guidance adherence at b...   \n",
      "7   Severity of COVID 19 among people with and wit...   \n",
      "8   Correlation between Volatile Organic Compounds...   \n",
      "9   in-hospital mortality secondary to COVID-19 am...   \n",
      "10  Reduce mortality|Reduce requirement for mechan...   \n",
      "11  Development of a tool to measure frailty|A \"pr...   \n",
      "12  Maternal COVID-19 serology (IgG and IgM)|Mater...   \n",
      "13  Identification of demographic and clinical fac...   \n",
      "14  Association between breath VOCs and IgG in blo...   \n",
      "15  Incidence of COVID-19 infection in patients wi...   \n",
      "16  Evaluation of COVID VIRO® diagnostic specifici...   \n",
      "17  Incidence of COVID-19 in IgG4 patients with pa...   \n",
      "18  Self-reported Version of the Leeds Assessment ...   \n",
      "19                               SARS-CoV-2 infection   \n",
      "20  Survival at discharge day (comparison between ...   \n",
      "21  Adverse reactions incidence.|Rate of mortality...   \n",
      "22  To assess the safety and tolerability of the C...   \n",
      "23  To identify the genetic determinants of COVID-...   \n",
      "24  immune responses|Duration and breadth of B- an...   \n",
      "25  Assessment of cognitive functions after COVID-...   \n",
      "26  Identify risk factors for intra-hospital morta...   \n",
      "27  Estimate the prevalence of COVID-19 infection ...   \n",
      "28  Diagnostic values of anosmia and ageusia for C...   \n",
      "29                  number of positive COVID-19 women   \n",
      "\n",
      "                                Sponsor/Collaborators  ...  \\\n",
      "0               Groupe Hospitalier Paris Saint Joseph  ...   \n",
      "1                          United Medical Specialties  ...   \n",
      "2                        University of Milano Bicocca  ...   \n",
      "3                     Hong Kong Sanatorium & Hospital  ...   \n",
      "4   Centre Hospitalier Régional d'Orléans|Centre d...  ...   \n",
      "5   Vinmec Research Institute of Stem Cell and Gen...  ...   \n",
      "6                             Meharry Medical College  ...   \n",
      "7   India Diabetes Research Foundation & Dr. A. Ra...  ...   \n",
      "8                   Scentech Medical Technologies Ltd  ...   \n",
      "9   Fundacion Arturo Lopez Perez|Confederación de ...  ...   \n",
      "10                                The Christ Hospital  ...   \n",
      "11                       University of Milano Bicocca  ...   \n",
      "12                Thomas Jefferson University|Nemours  ...   \n",
      "13                       University of Milano Bicocca  ...   \n",
      "14                  Scentech Medical Technologies Ltd  ...   \n",
      "15  Centre Hospitalier Universitaire, Amiens|Centr...  ...   \n",
      "16              Centre Hospitalier Régional d'Orléans  ...   \n",
      "17                    Chinese University of Hong Kong  ...   \n",
      "18                 Fondazione Don Carlo Gnocchi Onlus  ...   \n",
      "19  King's College London|Zoe Global Limited|Depar...  ...   \n",
      "20                       Istituto Ortopedico Galeazzi  ...   \n",
      "21                        Abu Dhabi Stem Cells Center  ...   \n",
      "22  Institut Pasteur|Themis Bioscience GmbH|Coalit...  ...   \n",
      "23  University of Siena|Policlinico San Matteo Pav...  ...   \n",
      "24  University of Bergen|Haukeland University Hosp...  ...   \n",
      "25                            University of Sao Paulo  ...   \n",
      "26                       University of Milano Bicocca  ...   \n",
      "27  Duke University|North Carolina Department of H...  ...   \n",
      "28                   University Hospital, Montpellier  ...   \n",
      "29                      University Hospital, Toulouse  ...   \n",
      "\n",
      "                    Other IDs          Start Date Primary Completion Date  \\\n",
      "0                 COVID-IDNow    November 9, 2020       December 22, 2020   \n",
      "1            COVID19-0001-USR    November 2, 2020       December 15, 2020   \n",
      "2                 TAC-COVID19         May 7, 2020           June 15, 2021   \n",
      "3                  RC-2020-08        May 25, 2020           July 31, 2020   \n",
      "4                CHRO-2020-10         May 5, 2020                May 2021   \n",
      "5                 ISC.20.11.1      August 1, 2020       November 30, 2020   \n",
      "6                      330875      September 2020            October 2021   \n",
      "7                  IDRFARH015   November 16, 2020       February 16, 2021   \n",
      "8                   Cov-2-IDF  September 22, 2020       December 30, 2021   \n",
      "9               FALP 001-2020       April 7, 2020           April 6, 2021   \n",
      "10                      20-23      April 28, 2020               July 2020   \n",
      "11                  FRA-COVID      April 16, 2020           March 1, 2021   \n",
      "12                   20F.1043   November 17, 2020       December 31, 2021   \n",
      "13               COVID-19 ORL       June 11, 2020        October 16, 2020   \n",
      "14           Cov-2-SMC-V-2020   December 24, 2020        October 24, 2021   \n",
      "15            PI2020_843_0042      April 27, 2020            October 2020   \n",
      "16               CHRO-2020-18    October 13, 2020        October 17, 2020   \n",
      "17                 IgG4-COVID      March 24, 2021               July 2021   \n",
      "18               Covid19-Pain    December 1, 2020           July 31, 2021   \n",
      "19  Covid-19 Validation Study        June 1, 2020            May 10, 2021   \n",
      "20              FACE COVID-19   November 14, 2020        December 1, 2020   \n",
      "21    CT.001.1.0.SENTAD-COVID       April 4, 2020            May 20, 2020   \n",
      "22    2020-016|2020-002973-89     August 10, 2020               June 2021   \n",
      "23                      16917       April 8, 2020           April 8, 2021   \n",
      "24             COVID-19vacc-1    January 12, 2021        January 12, 2022   \n",
      "25                MentalPlus®    November 8, 2020       December 29, 2020   \n",
      "26                   MI-COVID      April 30, 2020          April 30, 2021   \n",
      "27                Pro00105703        June 9, 2020           June 30, 2021   \n",
      "28             RECHMPL20_0176       March 1, 2020           April 1, 2020   \n",
      "29               RC31/20/0123      April 17, 2020              April 2021   \n",
      "\n",
      "      Completion Date        First Posted Results First Posted  \\\n",
      "0      April 30, 2021       March 8, 2021                  NaN   \n",
      "1    January 29, 2021    October 20, 2020                  NaN   \n",
      "2       June 15, 2021        May 20, 2020                  NaN   \n",
      "3     August 31, 2020        June 4, 2020                  NaN   \n",
      "4            May 2021        May 20, 2020                  NaN   \n",
      "5   December 30, 2020     August 18, 2020                  NaN   \n",
      "6           June 2022       July 20, 2020                  NaN   \n",
      "7        May 16, 2021   November 18, 2020                  NaN   \n",
      "8   December 30, 2021    October 26, 2020                  NaN   \n",
      "9       April 6, 2021        May 12, 2020                  NaN   \n",
      "10        August 2020      April 21, 2020                  NaN   \n",
      "11      March 1, 2021        June 2, 2020                  NaN   \n",
      "12      June 30, 2022    December 9, 2020                  NaN   \n",
      "13   October 16, 2020       June 11, 2020                  NaN   \n",
      "14   October 24, 2021      April 13, 2021                  NaN   \n",
      "15       October 2020      April 29, 2020                  NaN   \n",
      "16   October 17, 2020    October 30, 2020                  NaN   \n",
      "17       October 2021      March 26, 2021                  NaN   \n",
      "18      July 31, 2021   November 17, 2020                  NaN   \n",
      "19       May 10, 2021        May 29, 2020                  NaN   \n",
      "20  December 31, 2020    December 8, 2020                  NaN   \n",
      "21      July 14, 2020       July 16, 2020                  NaN   \n",
      "22          July 2021      August 4, 2020                  NaN   \n",
      "23      April 8, 2026  September 16, 2020                  NaN   \n",
      "24   January 12, 2024    January 12, 2021                  NaN   \n",
      "25  December 29, 2023   November 17, 2020                  NaN   \n",
      "26     April 30, 2021       June 11, 2020                  NaN   \n",
      "27      June 30, 2021        June 9, 2020                  NaN   \n",
      "28     April 30, 2020        May 29, 2020                  NaN   \n",
      "29         April 2022      April 24, 2020                  NaN   \n",
      "\n",
      "    Last Update Posted                                          Locations  \\\n",
      "0        March 8, 2021  Groupe Hospitalier Paris Saint-Joseph, Paris, ...   \n",
      "1     October 20, 2020       Cimedical, Barranquilla, Atlantico, Colombia   \n",
      "2     November 9, 2020  Ospedale Papa Giovanni XXIII, Bergamo, Italy|P...   \n",
      "3         June 4, 2020  Hong Kong Sanatorium & Hospital, Hong Kong, Ho...   \n",
      "4         June 4, 2020                       CHR Orléans, Orléans, France   \n",
      "5      August 20, 2020  Vinmec Research Institute of Stem cell and Gen...   \n",
      "6        July 20, 2020  Meharry Medical College, Nashville, Tennessee,...   \n",
      "7    November 18, 2020  Orthomed Hospital, Chennai, Tamil Nadu, India|...   \n",
      "8       April 13, 2021  IDF COVID 19 Isolation Facility, Ashkelon, Israel   \n",
      "9         May 12, 2020  Fundacion Arturo Lopez Perez, Providencia, San...   \n",
      "10        May 20, 2020  The Christ Hospital, Cincinnati, Ohio, United ...   \n",
      "11    January 22, 2021  Dipartimento di Geriatria, Fondazione Poliambu...   \n",
      "12       March 5, 2021  Thomas Jefferson University Hospital, Philadel...   \n",
      "13    January 27, 2021      ASST Monza-Ospedale San Gerardo, Monza, Italy   \n",
      "14      April 13, 2021       Shamir Medical Center, Be'er Ya'aqov, Israel   \n",
      "15      April 29, 2020                         CHU Amiens, Amiens, France   \n",
      "16    December 7, 2020  Centre Hospitalier Régional d'Orléans, France,...   \n",
      "17      March 26, 2021  Prince of Wales Hospital, The Chinese Universi...   \n",
      "18   November 19, 2020                 Jorge Hugo Villafañe, Milan, Italy   \n",
      "19       June 24, 2020      King's College London, London, United Kingdom   \n",
      "20    December 8, 2020  IRCCS Istituto Ortopedico Galeazzi, Milano, Italy   \n",
      "21       July 16, 2020  Abu Dhabi Stem Cells Center, Abu Dhabi, United...   \n",
      "22      April 13, 2021  SGS Life Sciences, Clinical Pharmacology Unit,...   \n",
      "23  September 17, 2020                  University of Siena, Siena, Italy   \n",
      "24    January 12, 2021               University of Bergen, Bergen, Norway   \n",
      "25   November 17, 2020  Livia Stocco Sanches Valentin, São Paulo, SP, ...   \n",
      "26    November 9, 2020      ASST Monza-Ospedale San Gerardo, Monza, Italy   \n",
      "27   November 17, 2020  Duke CTSI Translational Population Health Offi...   \n",
      "28        June 1, 2020                 Uhmontpellier, Montpellier, France   \n",
      "29      April 24, 2020  University Hospital of Toulouse, Toulouse, France   \n",
      "\n",
      "                                      Study Documents  \\\n",
      "0                                                 NaN   \n",
      "1                                                 NaN   \n",
      "2                                                 NaN   \n",
      "3                                                 NaN   \n",
      "4                                                 NaN   \n",
      "5                                                 NaN   \n",
      "6                                                 NaN   \n",
      "7                                                 NaN   \n",
      "8                                                 NaN   \n",
      "9                                                 NaN   \n",
      "10                                                NaN   \n",
      "11                                                NaN   \n",
      "12                                                NaN   \n",
      "13                                                NaN   \n",
      "14                                                NaN   \n",
      "15                                                NaN   \n",
      "16                                                NaN   \n",
      "17                                                NaN   \n",
      "18                                                NaN   \n",
      "19  \"Statistical Analysis Plan\", https://ClinicalT...   \n",
      "20                                                NaN   \n",
      "21                                                NaN   \n",
      "22                                                NaN   \n",
      "23                                                NaN   \n",
      "24                                                NaN   \n",
      "25                                                NaN   \n",
      "26                                                NaN   \n",
      "27                                                NaN   \n",
      "28                                                NaN   \n",
      "29                                                NaN   \n",
      "\n",
      "                                            URL  \n",
      "0   https://ClinicalTrials.gov/show/NCT04785898  \n",
      "1   https://ClinicalTrials.gov/show/NCT04595136  \n",
      "2   https://ClinicalTrials.gov/show/NCT04395482  \n",
      "3   https://ClinicalTrials.gov/show/NCT04416061  \n",
      "4   https://ClinicalTrials.gov/show/NCT04395924  \n",
      "5   https://ClinicalTrials.gov/show/NCT04516954  \n",
      "6   https://ClinicalTrials.gov/show/NCT04476940  \n",
      "7   https://ClinicalTrials.gov/show/NCT04634214  \n",
      "8   https://ClinicalTrials.gov/show/NCT04602884  \n",
      "9   https://ClinicalTrials.gov/show/NCT04384588  \n",
      "10  https://ClinicalTrials.gov/show/NCT04355897  \n",
      "11  https://ClinicalTrials.gov/show/NCT04412265  \n",
      "12  https://ClinicalTrials.gov/show/NCT04659759  \n",
      "13  https://ClinicalTrials.gov/show/NCT04427332  \n",
      "14  https://ClinicalTrials.gov/show/NCT04842708  \n",
      "15  https://ClinicalTrials.gov/show/NCT04367805  \n",
      "16  https://ClinicalTrials.gov/show/NCT04609969  \n",
      "17  https://ClinicalTrials.gov/show/NCT04817553  \n",
      "18  https://ClinicalTrials.gov/show/NCT04632355  \n",
      "19  https://ClinicalTrials.gov/show/NCT04407585  \n",
      "20  https://ClinicalTrials.gov/show/NCT04657510  \n",
      "21  https://ClinicalTrials.gov/show/NCT04473170  \n",
      "22  https://ClinicalTrials.gov/show/NCT04497298  \n",
      "23  https://ClinicalTrials.gov/show/NCT04549831  \n",
      "24  https://ClinicalTrials.gov/show/NCT04706390  \n",
      "25  https://ClinicalTrials.gov/show/NCT04632719  \n",
      "26  https://ClinicalTrials.gov/show/NCT04427345  \n",
      "27  https://ClinicalTrials.gov/show/NCT04424004  \n",
      "28  https://ClinicalTrials.gov/show/NCT04407494  \n",
      "29  https://ClinicalTrials.gov/show/NCT04360811  \n",
      "\n",
      "[30 rows x 27 columns]\n"
     ]
    }
   ],
   "source": [
    "print(\"First 30 rows:\", df.head(30))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "772fdab3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "First 5 rows:       Rank   NCT Number                                              Title  \\\n",
      "5778  5779  NCT04011644  Mobile Health for Alcohol Use Disorders in Cli...   \n",
      "5779  5780  NCT04681339  Antibiotic Prescription in Children Hospitaliz...   \n",
      "5780  5781  NCT04740229  Moderate-intensity Flow-based Yoga Effects on ...   \n",
      "5781  5782  NCT04804917           3-year Follow-up of the Mind My Mind RCT   \n",
      "5782  5783  NCT04680000  Chronic Pain Management In Primary Care Using ...   \n",
      "\n",
      "           Acronym              Status         Study Results  \\\n",
      "5778           NaN          Recruiting  No Results Available   \n",
      "5779           NaN  Not yet recruiting  No Results Available   \n",
      "5780           NaN          Recruiting  No Results Available   \n",
      "5781  MindMyMindFU          Recruiting  No Results Available   \n",
      "5782           NaN  Not yet recruiting  No Results Available   \n",
      "\n",
      "                                             Conditions  \\\n",
      "5778                      Alcohol Drinking|Telemedicine   \n",
      "5779  Community Acquired Pneumonia in Children|Antib...   \n",
      "5780                               Stress|Psychological   \n",
      "5781  Emotional Problem|Anxiety Disorder of Childhoo...   \n",
      "5782                                       Chronic Pain   \n",
      "\n",
      "                                          Interventions  \\\n",
      "5778  Behavioral: A-CHESS self-monitored|Behavioral:...   \n",
      "5779  Other: Antibiotic treatment|Other: No antibiot...   \n",
      "5780                                   Behavioral: Yoga   \n",
      "5781                                                NaN   \n",
      "5782  Behavioral: Brief Cognitive Behavioral Therapy...   \n",
      "\n",
      "                                       Outcome Measures  \\\n",
      "5778  Number of risky drinking days|Number of patien...   \n",
      "5779  Antibiotic treatment rates in hospitalized chi...   \n",
      "5780  Perceived Stress|Task switching paradigm|Digit...   \n",
      "5781  The child's impact of mental health problems r...   \n",
      "5782  Defense and Veterans Pain Rating Scale (DVPRS)...   \n",
      "\n",
      "                                  Sponsor/Collaborators  ...  \\\n",
      "5778  University of Wisconsin, Madison|National Inst...  ...   \n",
      "5779  ARCIM Institute Academic Research in Complemen...  ...   \n",
      "5780         University of Illinois at Urbana-Champaign  ...   \n",
      "5781  Mental Health Services in the Capital Region, ...  ...   \n",
      "5782  The University of Texas Health Science Center ...  ...   \n",
      "\n",
      "                                              Other IDs         Start Date  \\\n",
      "5778  2019-0337|R01AA024150|A532007|SMPH/FAMILY MED/...     March 23, 2020   \n",
      "5779                                             PKA-03         April 2021   \n",
      "5780                                              21584  February 10, 2021   \n",
      "5781                         MHSCRDenmark, F-61502-03-1     March 22, 2021   \n",
      "5782                                       HSC20200520H      February 2021   \n",
      "\n",
      "     Primary Completion Date    Completion Date       First Posted  \\\n",
      "5778             August 2022         April 2023       July 8, 2019   \n",
      "5779           November 2024      December 2024  December 23, 2020   \n",
      "5780               July 2021          July 2021   February 5, 2021   \n",
      "5781            May 31, 2022  December 31, 2022     March 18, 2021   \n",
      "5782           February 2024      February 2025  December 22, 2020   \n",
      "\n",
      "     Results First Posted Last Update Posted  \\\n",
      "5778                  NaN      April 2, 2021   \n",
      "5779                  NaN  February 10, 2021   \n",
      "5780                  NaN  February 24, 2021   \n",
      "5781                  NaN      April 1, 2021   \n",
      "5782                  NaN  December 22, 2020   \n",
      "\n",
      "                                              Locations Study Documents  \\\n",
      "5778  UW Health at the American Center, Madison, Wis...             NaN   \n",
      "5779  Die Filderklinik, Filderstadt, Baden-Württembe...             NaN   \n",
      "5780  University of Illinois at Urbana-Champaign, Ur...             NaN   \n",
      "5781  Mental Health Services in the Capital Region, ...             NaN   \n",
      "5782  Uniformed Services University for the Health S...             NaN   \n",
      "\n",
      "                                              URL  \n",
      "5778  https://ClinicalTrials.gov/show/NCT04011644  \n",
      "5779  https://ClinicalTrials.gov/show/NCT04681339  \n",
      "5780  https://ClinicalTrials.gov/show/NCT04740229  \n",
      "5781  https://ClinicalTrials.gov/show/NCT04804917  \n",
      "5782  https://ClinicalTrials.gov/show/NCT04680000  \n",
      "\n",
      "[5 rows x 27 columns]\n"
     ]
    }
   ],
   "source": [
    "print(\"First 5 rows:\", df.tail(5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "0efa034f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "First 15 rows:       Rank   NCT Number                                              Title  \\\n",
      "5768  5769  NCT04734795  The Prevalence of Dysfunctional Breathing in C...   \n",
      "5769  5770  NCT04190368  Team Clinic: Virtual Expansion of an Innovativ...   \n",
      "5770  5771  NCT03392883  Scaling Up Science-based Mental Health Interve...   \n",
      "5771  5772  NCT04301518  Prematurity Risk Assessment Combined With Clin...   \n",
      "5772  5773  NCT04607902  Harnessing Network Science to Personalize Scal...   \n",
      "5773  5774  NCT04639661  Predictors of Periodontal Outcomes Post-sanati...   \n",
      "5774  5775  NCT04180709  CBT to Reduce Insomnia and Improve Social Reco...   \n",
      "5775  5776  NCT04335643  Telehealth CBT for Adolescents and Young Adult...   \n",
      "5776  5777  NCT04589377  Mindfulness to Mitigate Psychological Threat a...   \n",
      "5777  5778  NCT04574466  Scaling-up Psychological Interventions With Sy...   \n",
      "5778  5779  NCT04011644  Mobile Health for Alcohol Use Disorders in Cli...   \n",
      "5779  5780  NCT04681339  Antibiotic Prescription in Children Hospitaliz...   \n",
      "5780  5781  NCT04740229  Moderate-intensity Flow-based Yoga Effects on ...   \n",
      "5781  5782  NCT04804917           3-year Follow-up of the Mind My Mind RCT   \n",
      "5782  5783  NCT04680000  Chronic Pain Management In Primary Care Using ...   \n",
      "\n",
      "           Acronym                   Status         Study Results  \\\n",
      "5768           NaN               Recruiting  No Results Available   \n",
      "5769           NaN       Not yet recruiting  No Results Available   \n",
      "5770         DIADA   Active, not recruiting  No Results Available   \n",
      "5771         PRIME               Recruiting  No Results Available   \n",
      "5772           NaN               Recruiting  No Results Available   \n",
      "5773           NaN  Enrolling by invitation  No Results Available   \n",
      "5774         CRISP               Recruiting  No Results Available   \n",
      "5775          cSLE               Recruiting  No Results Available   \n",
      "5776           NaN               Recruiting  No Results Available   \n",
      "5777           NaN               Recruiting  No Results Available   \n",
      "5778           NaN               Recruiting  No Results Available   \n",
      "5779           NaN       Not yet recruiting  No Results Available   \n",
      "5780           NaN               Recruiting  No Results Available   \n",
      "5781  MindMyMindFU               Recruiting  No Results Available   \n",
      "5782           NaN       Not yet recruiting  No Results Available   \n",
      "\n",
      "                                             Conditions  \\\n",
      "5768  Dysfunctional Breathing|Asthma in Children|Asthma   \n",
      "5769                                    Type 1 Diabetes   \n",
      "5770                 Depression|Problematic Alcohol Use   \n",
      "5771                        Preterm Labor|Preterm Birth   \n",
      "5772                                         Depression   \n",
      "5773            Periodontal Diseases|Periodontal Pocket   \n",
      "5774                Psychotic Disorders|Psychosis|Sleep   \n",
      "5775                       Systemic Lupus Erythematosus   \n",
      "5776                                             Stress   \n",
      "5777  Distress|PTSD|Anxiety|Depression|Trauma|Functi...   \n",
      "5778                      Alcohol Drinking|Telemedicine   \n",
      "5779  Community Acquired Pneumonia in Children|Antib...   \n",
      "5780                               Stress|Psychological   \n",
      "5781  Emotional Problem|Anxiety Disorder of Childhoo...   \n",
      "5782                                       Chronic Pain   \n",
      "\n",
      "                                          Interventions  \\\n",
      "5768                                                NaN   \n",
      "5769       Other: Team Clinic Care|Other: Standard Care   \n",
      "5770                                  Behavioral: Laddr   \n",
      "5771            Other: Multimodal intervention strategy   \n",
      "5772  Behavioral: Supportive Therapy SSI|Behavioral:...   \n",
      "5773                                                NaN   \n",
      "5774                                    Device: Sleepio   \n",
      "5775                                  Behavioral: TEACH   \n",
      "5776                   Behavioral: Mindfulness Training   \n",
      "5777                Behavioral: Problem Management Plus   \n",
      "5778  Behavioral: A-CHESS self-monitored|Behavioral:...   \n",
      "5779  Other: Antibiotic treatment|Other: No antibiot...   \n",
      "5780                                   Behavioral: Yoga   \n",
      "5781                                                NaN   \n",
      "5782  Behavioral: Brief Cognitive Behavioral Therapy...   \n",
      "\n",
      "                                       Outcome Measures  \\\n",
      "5768  Prevalence of children with dysfunctional brea...   \n",
      "5769  Hemoglobin A1C at Baseline|Hemoglobin A1C Prog...   \n",
      "5770  The Integrated Measure of Implementation Conte...   \n",
      "5771  Neonatal morbidity/mortality|Length of neonata...   \n",
      "5772  Change in adolescent depressive symptom severi...   \n",
      "5773  Probing depth|Bleeding on probing|Tooth Loss|O...   \n",
      "5774  Change from baseline Work and Social Adjustmen...   \n",
      "5775  Recruitment rates of the study|Retention rates...   \n",
      "5776  Mean differences in Psychological Threat measu...   \n",
      "5777  Change in psychological distress|Change in pos...   \n",
      "5778  Number of risky drinking days|Number of patien...   \n",
      "5779  Antibiotic treatment rates in hospitalized chi...   \n",
      "5780  Perceived Stress|Task switching paradigm|Digit...   \n",
      "5781  The child's impact of mental health problems r...   \n",
      "5782  Defense and Veterans Pain Rating Scale (DVPRS)...   \n",
      "\n",
      "                                  Sponsor/Collaborators  ...  \\\n",
      "5768  Kolding Sygehus|Odense Patient Data Explorativ...  ...   \n",
      "5769  Children's Hospital Los Angeles|University of ...  ...   \n",
      "5770  Dartmouth-Hitchcock Medical Center|Pontificia ...  ...   \n",
      "5771  Sera Prognostics, Inc.|High Risk Pregnancy Cen...  ...   \n",
      "5772                             Stony Brook University  ...   \n",
      "5773  Brock University|Dr. Peter C. Fritz, Periodont...  ...   \n",
      "5774  University of Cambridge|Cambridgeshire and Pet...  ...   \n",
      "5775  Michigan State University|Arthritis Foundation...  ...   \n",
      "5776  University of Pittsburgh|U.S. National Science...  ...   \n",
      "5777                               University of Zurich  ...   \n",
      "5778  University of Wisconsin, Madison|National Inst...  ...   \n",
      "5779  ARCIM Institute Academic Research in Complemen...  ...   \n",
      "5780         University of Illinois at Urbana-Champaign  ...   \n",
      "5781  Mental Health Services in the Capital Region, ...  ...   \n",
      "5782  The University of Texas Health Science Center ...  ...   \n",
      "\n",
      "                                              Other IDs         Start Date  \\\n",
      "5768                                      S-2020-0101-a   February 5, 2021   \n",
      "5769                                      CHLA-19-00062      March 1, 2021   \n",
      "5770                   1U19MH109988 D18019|1U19MH109988  February 13, 2018   \n",
      "5771                                              SP019   November 6, 2020   \n",
      "5772                                      IRB2019-00382  February 15, 2021   \n",
      "5773                                             20-070  November 25, 2020   \n",
      "5774                  M00915|RNAG-521|224101|19/EE/0352   October 30, 2020   \n",
      "5775                                      STUDY00003882     August 4, 2020   \n",
      "5776                                      STUDY19050258   October 26, 2020   \n",
      "5777                               BASEC-2017-01175-rct    August 25, 2020   \n",
      "5778  2019-0337|R01AA024150|A532007|SMPH/FAMILY MED/...     March 23, 2020   \n",
      "5779                                             PKA-03         April 2021   \n",
      "5780                                              21584  February 10, 2021   \n",
      "5781                         MHSCRDenmark, F-61502-03-1     March 22, 2021   \n",
      "5782                                       HSC20200520H      February 2021   \n",
      "\n",
      "     Primary Completion Date    Completion Date       First Posted  \\\n",
      "5768           December 2022      December 2022   February 2, 2021   \n",
      "5769          March 30, 2022  December 30, 2022   December 9, 2019   \n",
      "5770              April 2021         April 2021    January 8, 2018   \n",
      "5771      September 30, 2022      June 30, 2025     March 10, 2020   \n",
      "5772               July 2024        August 2024   October 29, 2020   \n",
      "5773             August 2021      December 2021  November 20, 2020   \n",
      "5774       November 30, 2022  November 30, 2022  November 27, 2019   \n",
      "5775           December 2021      December 2021      April 6, 2020   \n",
      "5776         August 31, 2022  December 31, 2022   October 19, 2020   \n",
      "5777               June 2022          June 2022    October 5, 2020   \n",
      "5778             August 2022         April 2023       July 8, 2019   \n",
      "5779           November 2024      December 2024  December 23, 2020   \n",
      "5780               July 2021          July 2021   February 5, 2021   \n",
      "5781            May 31, 2022  December 31, 2022     March 18, 2021   \n",
      "5782           February 2024      February 2025  December 22, 2020   \n",
      "\n",
      "     Results First Posted Last Update Posted  \\\n",
      "5768                  NaN   February 9, 2021   \n",
      "5769                  NaN      March 1, 2021   \n",
      "5770                  NaN  December 19, 2020   \n",
      "5771                  NaN     April 14, 2021   \n",
      "5772                  NaN     March 17, 2021   \n",
      "5773                  NaN   December 4, 2020   \n",
      "5774                  NaN   November 3, 2020   \n",
      "5775                  NaN      March 9, 2021   \n",
      "5776                  NaN  November 10, 2020   \n",
      "5777                  NaN    October 5, 2020   \n",
      "5778                  NaN      April 2, 2021   \n",
      "5779                  NaN  February 10, 2021   \n",
      "5780                  NaN  February 24, 2021   \n",
      "5781                  NaN      April 1, 2021   \n",
      "5782                  NaN  December 22, 2020   \n",
      "\n",
      "                                              Locations  \\\n",
      "5768                  Kolding Sygehus, Kolding, Denmark   \n",
      "5769  Children's Hospital Los Angeles, Los Angeles, ...   \n",
      "5770  Salud de Tundama, Duitama, Boyaca, Colombia|Em...   \n",
      "5771  High Risk Pregnancy Center, Las Vegas, Nevada,...   \n",
      "5772  Stony Brook University, Stony Brook, New York,...   \n",
      "5773  Dr. Peter C. Fritz, Periodontal Wellness & Imp...   \n",
      "5774  Cameo Early Intervention, Cambridge, Cambridge...   \n",
      "5775  Michigan State University, Grand Rapids, Michi...   \n",
      "5776  University of Pittsburgh, Pittsburgh, Pennsylv...   \n",
      "5777  Klinik für Konsiliarpsychiatrie und Psychosoma...   \n",
      "5778  UW Health at the American Center, Madison, Wis...   \n",
      "5779  Die Filderklinik, Filderstadt, Baden-Württembe...   \n",
      "5780  University of Illinois at Urbana-Champaign, Ur...   \n",
      "5781  Mental Health Services in the Capital Region, ...   \n",
      "5782  Uniformed Services University for the Health S...   \n",
      "\n",
      "                                        Study Documents  \\\n",
      "5768                                                NaN   \n",
      "5769                                                NaN   \n",
      "5770  \"Informed Consent Form: Patient Informed Conse...   \n",
      "5771                                                NaN   \n",
      "5772                                                NaN   \n",
      "5773                                                NaN   \n",
      "5774                                                NaN   \n",
      "5775                                                NaN   \n",
      "5776                                                NaN   \n",
      "5777                                                NaN   \n",
      "5778                                                NaN   \n",
      "5779                                                NaN   \n",
      "5780                                                NaN   \n",
      "5781                                                NaN   \n",
      "5782                                                NaN   \n",
      "\n",
      "                                              URL  \n",
      "5768  https://ClinicalTrials.gov/show/NCT04734795  \n",
      "5769  https://ClinicalTrials.gov/show/NCT04190368  \n",
      "5770  https://ClinicalTrials.gov/show/NCT03392883  \n",
      "5771  https://ClinicalTrials.gov/show/NCT04301518  \n",
      "5772  https://ClinicalTrials.gov/show/NCT04607902  \n",
      "5773  https://ClinicalTrials.gov/show/NCT04639661  \n",
      "5774  https://ClinicalTrials.gov/show/NCT04180709  \n",
      "5775  https://ClinicalTrials.gov/show/NCT04335643  \n",
      "5776  https://ClinicalTrials.gov/show/NCT04589377  \n",
      "5777  https://ClinicalTrials.gov/show/NCT04574466  \n",
      "5778  https://ClinicalTrials.gov/show/NCT04011644  \n",
      "5779  https://ClinicalTrials.gov/show/NCT04681339  \n",
      "5780  https://ClinicalTrials.gov/show/NCT04740229  \n",
      "5781  https://ClinicalTrials.gov/show/NCT04804917  \n",
      "5782  https://ClinicalTrials.gov/show/NCT04680000  \n",
      "\n",
      "[15 rows x 27 columns]\n"
     ]
    }
   ],
   "source": [
    "print(\"First 15 rows:\", df.tail(15))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "71994f54",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "First 30 rows:       Rank   NCT Number                                              Title  \\\n",
      "5753  5754  NCT04646291  Retrospective Study on the Use, Efficiency, an...   \n",
      "5754  5755  NCT04735406                        The MS-LINK™ Outcomes Study   \n",
      "5755  5756  NCT02495753  Vaginal Cleansing Before Cesarean Delivery to ...   \n",
      "5756  5757  NCT03465280  Airway Intervention Registry (AIR): Recurrent ...   \n",
      "5757  5758  NCT04576377  Dynamics of the Immune Responses to Repeat Inf...   \n",
      "5758  5759  NCT04713735  Impact of Lactoferrin, a Dietary Supplement, v...   \n",
      "5759  5760  NCT03873467  Group Lifestyle Balance™ for Individuals With ...   \n",
      "5760  5761  NCT03594734  Group Lifestyle Balance™ for Individuals With ...   \n",
      "5761  5762  NCT03728257  Lung Transplant G0 (LTGO): Improving Self-Mana...   \n",
      "5762  5763  NCT04782518       Personalizing Exercise for Parkinson Disease   \n",
      "5763  5764  NCT04783454  Preventing Neck & Back Pain in Teleworking Off...   \n",
      "5764  5765  NCT03823469  Evaluating the Impact of a Culinary Coaching T...   \n",
      "5765  5766  NCT04429061  Reaching 90 90 90 in Adolescents in Zambia: Us...   \n",
      "5766  5767  NCT04184895  A Study to Assess the Safety, Tolerability and...   \n",
      "5767  5768  NCT03922334                        Navigating New Motherhood 2   \n",
      "5768  5769  NCT04734795  The Prevalence of Dysfunctional Breathing in C...   \n",
      "5769  5770  NCT04190368  Team Clinic: Virtual Expansion of an Innovativ...   \n",
      "5770  5771  NCT03392883  Scaling Up Science-based Mental Health Interve...   \n",
      "5771  5772  NCT04301518  Prematurity Risk Assessment Combined With Clin...   \n",
      "5772  5773  NCT04607902  Harnessing Network Science to Personalize Scal...   \n",
      "5773  5774  NCT04639661  Predictors of Periodontal Outcomes Post-sanati...   \n",
      "5774  5775  NCT04180709  CBT to Reduce Insomnia and Improve Social Reco...   \n",
      "5775  5776  NCT04335643  Telehealth CBT for Adolescents and Young Adult...   \n",
      "5776  5777  NCT04589377  Mindfulness to Mitigate Psychological Threat a...   \n",
      "5777  5778  NCT04574466  Scaling-up Psychological Interventions With Sy...   \n",
      "5778  5779  NCT04011644  Mobile Health for Alcohol Use Disorders in Cli...   \n",
      "5779  5780  NCT04681339  Antibiotic Prescription in Children Hospitaliz...   \n",
      "5780  5781  NCT04740229  Moderate-intensity Flow-based Yoga Effects on ...   \n",
      "5781  5782  NCT04804917           3-year Follow-up of the Mind My Mind RCT   \n",
      "5782  5783  NCT04680000  Chronic Pain Management In Primary Care Using ...   \n",
      "\n",
      "           Acronym                   Status         Study Results  \\\n",
      "5753           NaN                Completed  No Results Available   \n",
      "5754           NaN       Not yet recruiting  No Results Available   \n",
      "5755           NaN   Active, not recruiting  No Results Available   \n",
      "5756       AIR:RRP               Recruiting  No Results Available   \n",
      "5757         DRIVE               Recruiting  No Results Available   \n",
      "5758           NaN               Recruiting  No Results Available   \n",
      "5759       GLB-CVA   Active, not recruiting  No Results Available   \n",
      "5760           NaN   Active, not recruiting  No Results Available   \n",
      "5761          LTGO               Recruiting  No Results Available   \n",
      "5762           PEP               Recruiting  No Results Available   \n",
      "5763  Prevent@HOME               Recruiting  No Results Available   \n",
      "5764           NaN               Recruiting  No Results Available   \n",
      "5765        SKILLZ               Recruiting  No Results Available   \n",
      "5766           NaN               Recruiting  No Results Available   \n",
      "5767          NNM2               Recruiting  No Results Available   \n",
      "5768           NaN               Recruiting  No Results Available   \n",
      "5769           NaN       Not yet recruiting  No Results Available   \n",
      "5770         DIADA   Active, not recruiting  No Results Available   \n",
      "5771         PRIME               Recruiting  No Results Available   \n",
      "5772           NaN               Recruiting  No Results Available   \n",
      "5773           NaN  Enrolling by invitation  No Results Available   \n",
      "5774         CRISP               Recruiting  No Results Available   \n",
      "5775          cSLE               Recruiting  No Results Available   \n",
      "5776           NaN               Recruiting  No Results Available   \n",
      "5777           NaN               Recruiting  No Results Available   \n",
      "5778           NaN               Recruiting  No Results Available   \n",
      "5779           NaN       Not yet recruiting  No Results Available   \n",
      "5780           NaN               Recruiting  No Results Available   \n",
      "5781  MindMyMindFU               Recruiting  No Results Available   \n",
      "5782           NaN       Not yet recruiting  No Results Available   \n",
      "\n",
      "                                             Conditions  \\\n",
      "5753  Infertility|Infertility, Female|Infertility, Male   \n",
      "5754                                 Multiple Sclerosis   \n",
      "5755                    Complications; Cesarean Section   \n",
      "5756  Recurrent Respiratory Papillomatosis|Human Pap...   \n",
      "5757                                   Influenza, Human   \n",
      "5758                                      Immune Health   \n",
      "5759                                             Stroke   \n",
      "5760                             Traumatic Brain Injury   \n",
      "5761                      Exercise|Lung Transplantation   \n",
      "5762                                  Parkinson Disease   \n",
      "5763                  Neck Pain|Low Back Pain|Back Pain   \n",
      "5764                                 Overweight|Obesity   \n",
      "5765  HIV Infections|Pregnancy Related|STI|Mental He...   \n",
      "5766                       Allergic to House Dust Mites   \n",
      "5767  Postpartum Health|Breastfeeding|Contraception|...   \n",
      "5768  Dysfunctional Breathing|Asthma in Children|Asthma   \n",
      "5769                                    Type 1 Diabetes   \n",
      "5770                 Depression|Problematic Alcohol Use   \n",
      "5771                        Preterm Labor|Preterm Birth   \n",
      "5772                                         Depression   \n",
      "5773            Periodontal Diseases|Periodontal Pocket   \n",
      "5774                Psychotic Disorders|Psychosis|Sleep   \n",
      "5775                       Systemic Lupus Erythematosus   \n",
      "5776                                             Stress   \n",
      "5777  Distress|PTSD|Anxiety|Depression|Trauma|Functi...   \n",
      "5778                      Alcohol Drinking|Telemedicine   \n",
      "5779  Community Acquired Pneumonia in Children|Antib...   \n",
      "5780                               Stress|Psychological   \n",
      "5781  Emotional Problem|Anxiety Disorder of Childhoo...   \n",
      "5782                                       Chronic Pain   \n",
      "\n",
      "                                          Interventions  \\\n",
      "5753       Device: Insemination with the Mosie Baby Kit   \n",
      "5754                                                NaN   \n",
      "5755  Procedure: Vaginal Cleansing|Procedure: Abdomi...   \n",
      "5756  Procedure: Microdebrider|Procedure: Cold-steel...   \n",
      "5757                 Biological: FluBlok|Other: Placebo   \n",
      "5758  Dietary Supplement: Control: Placebo|Dietary S...   \n",
      "5759  Behavioral: Group Lifestyle Balance|Other: Usu...   \n",
      "5760  Behavioral: Group Lifestyle Balance™|Other: At...   \n",
      "5761  Behavioral: LTGO-Home Based Exercise|Behaviora...   \n",
      "5762                                                NaN   \n",
      "5763                            Other: Exercise program   \n",
      "5764  Behavioral: CCTP|Behavioral: Nutritional couns...   \n",
      "5765  Behavioral: SKILLZ-Girl Enhanced football curr...   \n",
      "5766            Biological: ASP2390|Biological: Placebo   \n",
      "5767             Behavioral: Patient Navigation Program   \n",
      "5768                                                NaN   \n",
      "5769       Other: Team Clinic Care|Other: Standard Care   \n",
      "5770                                  Behavioral: Laddr   \n",
      "5771            Other: Multimodal intervention strategy   \n",
      "5772  Behavioral: Supportive Therapy SSI|Behavioral:...   \n",
      "5773                                                NaN   \n",
      "5774                                    Device: Sleepio   \n",
      "5775                                  Behavioral: TEACH   \n",
      "5776                   Behavioral: Mindfulness Training   \n",
      "5777                Behavioral: Problem Management Plus   \n",
      "5778  Behavioral: A-CHESS self-monitored|Behavioral:...   \n",
      "5779  Other: Antibiotic treatment|Other: No antibiot...   \n",
      "5780                                   Behavioral: Yoga   \n",
      "5781                                                NaN   \n",
      "5782  Behavioral: Brief Cognitive Behavioral Therapy...   \n",
      "\n",
      "                                       Outcome Measures  \\\n",
      "5753  Pregnancy rate|Pregnancy rate differences betw...   \n",
      "5754  Patient-determined Disease Steps (PDDS) Scale ...   \n",
      "5755  Composite Postoperative Infectious Morbidity|A...   \n",
      "5756  Time interval between surgical interventions|R...   \n",
      "5757  Immune response to vaccination (4-fold rise in...   \n",
      "5758  Number of Respiratory Tract Infections|Severit...   \n",
      "5759  Change in weight|Physical Activity|Arm Circumf...   \n",
      "5760  Change in weight|Step Count|Waist and Arm Circ...   \n",
      "5761  Physical function-Walking, Change is being Ass...   \n",
      "5762  Association between exercise patterns and qual...   \n",
      "5763  neck pain|low back pain|behavioral change|phys...   \n",
      "5764  Body weight at 6 months|Body weight|Culinary a...   \n",
      "5765  Number of participants undergoing HIV testing ...   \n",
      "5766  Number of participants with Adverse Events (AE...   \n",
      "5767  Postpartum health at 4-12 weeks after delivery...   \n",
      "5768  Prevalence of children with dysfunctional brea...   \n",
      "5769  Hemoglobin A1C at Baseline|Hemoglobin A1C Prog...   \n",
      "5770  The Integrated Measure of Implementation Conte...   \n",
      "5771  Neonatal morbidity/mortality|Length of neonata...   \n",
      "5772  Change in adolescent depressive symptom severi...   \n",
      "5773  Probing depth|Bleeding on probing|Tooth Loss|O...   \n",
      "5774  Change from baseline Work and Social Adjustmen...   \n",
      "5775  Recruitment rates of the study|Retention rates...   \n",
      "5776  Mean differences in Psychological Threat measu...   \n",
      "5777  Change in psychological distress|Change in pos...   \n",
      "5778  Number of risky drinking days|Number of patien...   \n",
      "5779  Antibiotic treatment rates in hospitalized chi...   \n",
      "5780  Perceived Stress|Task switching paradigm|Digit...   \n",
      "5781  The child's impact of mental health problems r...   \n",
      "5782  Defense and Veterans Pain Rating Scale (DVPRS)...   \n",
      "\n",
      "                                  Sponsor/Collaborators  ...  \\\n",
      "5753                                         Mosie Baby  ...   \n",
      "5754  EMD Serono Research & Development Institute, I...  ...   \n",
      "5755           Washington University School of Medicine  ...   \n",
      "5756  Newcastle-upon-Tyne Hospitals NHS Trust|Alder ...  ...   \n",
      "5757  The University of Hong Kong|University of Chic...  ...   \n",
      "5758                             Mead Johnson Nutrition  ...   \n",
      "5759  Baylor Research Institute|National Institute o...  ...   \n",
      "5760  Baylor Research Institute|National Institute o...  ...   \n",
      "5761  University of Pittsburgh|National Institute of...  ...   \n",
      "5762  Washington University School of Medicine|Ameri...  ...   \n",
      "5763                                   University Ghent  ...   \n",
      "5764  Spaulding Rehabilitation Hospital|Sheba Medica...  ...   \n",
      "5765  University of Alabama at Birmingham|University...  ...   \n",
      "5766  Astellas Pharma Global Development, Inc.|Astel...  ...   \n",
      "5767  Northwestern University|Eunice Kennedy Shriver...  ...   \n",
      "5768  Kolding Sygehus|Odense Patient Data Explorativ...  ...   \n",
      "5769  Children's Hospital Los Angeles|University of ...  ...   \n",
      "5770  Dartmouth-Hitchcock Medical Center|Pontificia ...  ...   \n",
      "5771  Sera Prognostics, Inc.|High Risk Pregnancy Cen...  ...   \n",
      "5772                             Stony Brook University  ...   \n",
      "5773  Brock University|Dr. Peter C. Fritz, Periodont...  ...   \n",
      "5774  University of Cambridge|Cambridgeshire and Pet...  ...   \n",
      "5775  Michigan State University|Arthritis Foundation...  ...   \n",
      "5776  University of Pittsburgh|U.S. National Science...  ...   \n",
      "5777                               University of Zurich  ...   \n",
      "5778  University of Wisconsin, Madison|National Inst...  ...   \n",
      "5779  ARCIM Institute Academic Research in Complemen...  ...   \n",
      "5780         University of Illinois at Urbana-Champaign  ...   \n",
      "5781  Mental Health Services in the Capital Region, ...  ...   \n",
      "5782  The University of Texas Health Science Center ...  ...   \n",
      "\n",
      "                                              Other IDs         Start Date  \\\n",
      "5753                                          Mosie-001     August 5, 2020   \n",
      "5754                                      MS200077_0021      April 7, 2021   \n",
      "5755                                          201505127        August 2015   \n",
      "5756                                               8733      April 1, 2018   \n",
      "5757                                BJC033|1U01AI153700    October 5, 2020   \n",
      "5758                                             3393-1  December 28, 2020   \n",
      "5759                                      BSWRI 018-714       July 8, 2019   \n",
      "5760                                      BSWRI 017-482    January 8, 2019   \n",
      "5761      PRO18030496 (STUDY19020357)|1R01NR017196-01A1      April 1, 2019   \n",
      "5762                                          202002075  February 19, 2021   \n",
      "5763                                           BC-08635  November 12, 2020   \n",
      "5764                                        2018P002115       May 20, 2019   \n",
      "5765                                      IRB-300002251      March 1, 2020   \n",
      "5766                        2390-CL-0001|2018-004678-83    October 7, 2020   \n",
      "5767                          STU002096009|1R01HD098178   January 21, 2020   \n",
      "5768                                      S-2020-0101-a   February 5, 2021   \n",
      "5769                                      CHLA-19-00062      March 1, 2021   \n",
      "5770                   1U19MH109988 D18019|1U19MH109988  February 13, 2018   \n",
      "5771                                              SP019   November 6, 2020   \n",
      "5772                                      IRB2019-00382  February 15, 2021   \n",
      "5773                                             20-070  November 25, 2020   \n",
      "5774                  M00915|RNAG-521|224101|19/EE/0352   October 30, 2020   \n",
      "5775                                      STUDY00003882     August 4, 2020   \n",
      "5776                                      STUDY19050258   October 26, 2020   \n",
      "5777                               BASEC-2017-01175-rct    August 25, 2020   \n",
      "5778  2019-0337|R01AA024150|A532007|SMPH/FAMILY MED/...     March 23, 2020   \n",
      "5779                                             PKA-03         April 2021   \n",
      "5780                                              21584  February 10, 2021   \n",
      "5781                         MHSCRDenmark, F-61502-03-1     March 22, 2021   \n",
      "5782                                       HSC20200520H      February 2021   \n",
      "\n",
      "     Primary Completion Date     Completion Date       First Posted  \\\n",
      "5753         August 31, 2020   September 1, 2020  November 27, 2020   \n",
      "5754        January 31, 2024    January 31, 2024   February 3, 2021   \n",
      "5755            January 2021           June 2021      July 13, 2015   \n",
      "5756         August 31, 2021     August 31, 2021     March 14, 2018   \n",
      "5757       December 31, 2024   December 31, 2024    October 6, 2020   \n",
      "5758          April 30, 2021      April 30, 2021   January 19, 2021   \n",
      "5759           June 30, 2021  September 30, 2021     March 13, 2019   \n",
      "5760       December 30, 2021  September 30, 2022      July 20, 2018   \n",
      "5761          March 31, 2022      March 31, 2022   November 2, 2018   \n",
      "5762       February 18, 2024   February 18, 2025      March 4, 2021   \n",
      "5763       November 30, 2028   December 31, 2028      March 5, 2021   \n",
      "5764           February 2022       February 2022   January 30, 2019   \n",
      "5765          March 30, 2023       June 30, 2023      June 11, 2020   \n",
      "5766         August 31, 2026     August 31, 2026   December 4, 2019   \n",
      "5767            January 2024       December 2024     April 19, 2019   \n",
      "5768           December 2022       December 2022   February 2, 2021   \n",
      "5769          March 30, 2022   December 30, 2022   December 9, 2019   \n",
      "5770              April 2021          April 2021    January 8, 2018   \n",
      "5771      September 30, 2022       June 30, 2025     March 10, 2020   \n",
      "5772               July 2024         August 2024   October 29, 2020   \n",
      "5773             August 2021       December 2021  November 20, 2020   \n",
      "5774       November 30, 2022   November 30, 2022  November 27, 2019   \n",
      "5775           December 2021       December 2021      April 6, 2020   \n",
      "5776         August 31, 2022   December 31, 2022   October 19, 2020   \n",
      "5777               June 2022           June 2022    October 5, 2020   \n",
      "5778             August 2022          April 2023       July 8, 2019   \n",
      "5779           November 2024       December 2024  December 23, 2020   \n",
      "5780               July 2021           July 2021   February 5, 2021   \n",
      "5781            May 31, 2022   December 31, 2022     March 18, 2021   \n",
      "5782           February 2024       February 2025  December 22, 2020   \n",
      "\n",
      "     Results First Posted  Last Update Posted  \\\n",
      "5753                  NaN    December 1, 2020   \n",
      "5754                  NaN       March 5, 2021   \n",
      "5755                  NaN   February 11, 2021   \n",
      "5756                  NaN   September 9, 2020   \n",
      "5757                  NaN    November 5, 2020   \n",
      "5758                  NaN    January 19, 2021   \n",
      "5759                  NaN  September 23, 2020   \n",
      "5760                  NaN  September 23, 2020   \n",
      "5761                  NaN   February 11, 2021   \n",
      "5762                  NaN       March 4, 2021   \n",
      "5763                  NaN       March 5, 2021   \n",
      "5764                  NaN        May 22, 2020   \n",
      "5765                  NaN       June 11, 2020   \n",
      "5766                  NaN      March 23, 2021   \n",
      "5767                  NaN       March 3, 2021   \n",
      "5768                  NaN    February 9, 2021   \n",
      "5769                  NaN       March 1, 2021   \n",
      "5770                  NaN   December 19, 2020   \n",
      "5771                  NaN      April 14, 2021   \n",
      "5772                  NaN      March 17, 2021   \n",
      "5773                  NaN    December 4, 2020   \n",
      "5774                  NaN    November 3, 2020   \n",
      "5775                  NaN       March 9, 2021   \n",
      "5776                  NaN   November 10, 2020   \n",
      "5777                  NaN     October 5, 2020   \n",
      "5778                  NaN       April 2, 2021   \n",
      "5779                  NaN   February 10, 2021   \n",
      "5780                  NaN   February 24, 2021   \n",
      "5781                  NaN       April 1, 2021   \n",
      "5782                  NaN   December 22, 2020   \n",
      "\n",
      "                                              Locations  \\\n",
      "5753  Virtual Study - Online Data Collection, Austin...   \n",
      "5754  Georgetown University, Washington, District of...   \n",
      "5755  Barnes Jewish Hospital, Saint Louis, Missouri,...   \n",
      "5756  NHS Grampian, Aberdeen, United Kingdom|Betsi C...   \n",
      "5757  The University of Hong Kong, Hong Kong, Hong Kong   \n",
      "5758  VA Greater Los Angeles Healthcare System, Los ...   \n",
      "5759  Baylor Scott & White Institute for Rehabilitat...   \n",
      "5760  Baylor Scott & White Institute for Rehabilitat...   \n",
      "5761  University of Pittsburgh, School of Nursing, P...   \n",
      "5762  Washington University School of Medicine, Sain...   \n",
      "5763    Ghent University, Ghent, East Flanders, Belgium   \n",
      "5764  Spaulding Rehabilitation hospitle, Charlestown...   \n",
      "5765  Centre of Infectious Disease Research Zambia, ...   \n",
      "5766  Site DE49001, Berlin, Germany|Site DE49002, Ha...   \n",
      "5767  Northwestern Memorial Hospital, Chicago, Illin...   \n",
      "5768                  Kolding Sygehus, Kolding, Denmark   \n",
      "5769  Children's Hospital Los Angeles, Los Angeles, ...   \n",
      "5770  Salud de Tundama, Duitama, Boyaca, Colombia|Em...   \n",
      "5771  High Risk Pregnancy Center, Las Vegas, Nevada,...   \n",
      "5772  Stony Brook University, Stony Brook, New York,...   \n",
      "5773  Dr. Peter C. Fritz, Periodontal Wellness & Imp...   \n",
      "5774  Cameo Early Intervention, Cambridge, Cambridge...   \n",
      "5775  Michigan State University, Grand Rapids, Michi...   \n",
      "5776  University of Pittsburgh, Pittsburgh, Pennsylv...   \n",
      "5777  Klinik für Konsiliarpsychiatrie und Psychosoma...   \n",
      "5778  UW Health at the American Center, Madison, Wis...   \n",
      "5779  Die Filderklinik, Filderstadt, Baden-Württembe...   \n",
      "5780  University of Illinois at Urbana-Champaign, Ur...   \n",
      "5781  Mental Health Services in the Capital Region, ...   \n",
      "5782  Uniformed Services University for the Health S...   \n",
      "\n",
      "                                        Study Documents  \\\n",
      "5753                                                NaN   \n",
      "5754                                                NaN   \n",
      "5755                                                NaN   \n",
      "5756                                                NaN   \n",
      "5757                                                NaN   \n",
      "5758                                                NaN   \n",
      "5759                                                NaN   \n",
      "5760                                                NaN   \n",
      "5761                                                NaN   \n",
      "5762                                                NaN   \n",
      "5763                                                NaN   \n",
      "5764                                                NaN   \n",
      "5765  \"Study Protocol and Statistical Analysis Plan\"...   \n",
      "5766                                                NaN   \n",
      "5767                                                NaN   \n",
      "5768                                                NaN   \n",
      "5769                                                NaN   \n",
      "5770  \"Informed Consent Form: Patient Informed Conse...   \n",
      "5771                                                NaN   \n",
      "5772                                                NaN   \n",
      "5773                                                NaN   \n",
      "5774                                                NaN   \n",
      "5775                                                NaN   \n",
      "5776                                                NaN   \n",
      "5777                                                NaN   \n",
      "5778                                                NaN   \n",
      "5779                                                NaN   \n",
      "5780                                                NaN   \n",
      "5781                                                NaN   \n",
      "5782                                                NaN   \n",
      "\n",
      "                                              URL  \n",
      "5753  https://ClinicalTrials.gov/show/NCT04646291  \n",
      "5754  https://ClinicalTrials.gov/show/NCT04735406  \n",
      "5755  https://ClinicalTrials.gov/show/NCT02495753  \n",
      "5756  https://ClinicalTrials.gov/show/NCT03465280  \n",
      "5757  https://ClinicalTrials.gov/show/NCT04576377  \n",
      "5758  https://ClinicalTrials.gov/show/NCT04713735  \n",
      "5759  https://ClinicalTrials.gov/show/NCT03873467  \n",
      "5760  https://ClinicalTrials.gov/show/NCT03594734  \n",
      "5761  https://ClinicalTrials.gov/show/NCT03728257  \n",
      "5762  https://ClinicalTrials.gov/show/NCT04782518  \n",
      "5763  https://ClinicalTrials.gov/show/NCT04783454  \n",
      "5764  https://ClinicalTrials.gov/show/NCT03823469  \n",
      "5765  https://ClinicalTrials.gov/show/NCT04429061  \n",
      "5766  https://ClinicalTrials.gov/show/NCT04184895  \n",
      "5767  https://ClinicalTrials.gov/show/NCT03922334  \n",
      "5768  https://ClinicalTrials.gov/show/NCT04734795  \n",
      "5769  https://ClinicalTrials.gov/show/NCT04190368  \n",
      "5770  https://ClinicalTrials.gov/show/NCT03392883  \n",
      "5771  https://ClinicalTrials.gov/show/NCT04301518  \n",
      "5772  https://ClinicalTrials.gov/show/NCT04607902  \n",
      "5773  https://ClinicalTrials.gov/show/NCT04639661  \n",
      "5774  https://ClinicalTrials.gov/show/NCT04180709  \n",
      "5775  https://ClinicalTrials.gov/show/NCT04335643  \n",
      "5776  https://ClinicalTrials.gov/show/NCT04589377  \n",
      "5777  https://ClinicalTrials.gov/show/NCT04574466  \n",
      "5778  https://ClinicalTrials.gov/show/NCT04011644  \n",
      "5779  https://ClinicalTrials.gov/show/NCT04681339  \n",
      "5780  https://ClinicalTrials.gov/show/NCT04740229  \n",
      "5781  https://ClinicalTrials.gov/show/NCT04804917  \n",
      "5782  https://ClinicalTrials.gov/show/NCT04680000  \n",
      "\n",
      "[30 rows x 27 columns]\n"
     ]
    }
   ],
   "source": [
    "print(\"First 30 rows:\", df.tail(30))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3081d7f4",
   "metadata": {},
   "source": [
    "#### Q2. Extract and document key insights by exploring column data types and applying info and describe and write down your insights inside the markdown."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "44bca453",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5783 entries, 0 to 5782\n",
      "Data columns (total 27 columns):\n",
      " #   Column                   Non-Null Count  Dtype  \n",
      "---  ------                   --------------  -----  \n",
      " 0   Rank                     5783 non-null   int64  \n",
      " 1   NCT Number               5783 non-null   object \n",
      " 2   Title                    5783 non-null   object \n",
      " 3   Acronym                  2480 non-null   object \n",
      " 4   Status                   5783 non-null   object \n",
      " 5   Study Results            5783 non-null   object \n",
      " 6   Conditions               5783 non-null   object \n",
      " 7   Interventions            4897 non-null   object \n",
      " 8   Outcome Measures         5748 non-null   object \n",
      " 9   Sponsor/Collaborators    5783 non-null   object \n",
      " 10  Gender                   5773 non-null   object \n",
      " 11  Age                      5783 non-null   object \n",
      " 12  Phases                   3322 non-null   object \n",
      " 13  Enrollment               5749 non-null   float64\n",
      " 14  Funded Bys               5783 non-null   object \n",
      " 15  Study Type               5783 non-null   object \n",
      " 16  Study Designs            5748 non-null   object \n",
      " 17  Other IDs                5782 non-null   object \n",
      " 18  Start Date               5749 non-null   object \n",
      " 19  Primary Completion Date  5747 non-null   object \n",
      " 20  Completion Date          5747 non-null   object \n",
      " 21  First Posted             5783 non-null   object \n",
      " 22  Results First Posted     36 non-null     object \n",
      " 23  Last Update Posted       5783 non-null   object \n",
      " 24  Locations                5197 non-null   object \n",
      " 25  Study Documents          182 non-null    object \n",
      " 26  URL                      5783 non-null   object \n",
      "dtypes: float64(1), int64(1), object(25)\n",
      "memory usage: 1.2+ MB\n"
     ]
    }
   ],
   "source": [
    "column_info = df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "f134ca37",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Rank</th>\n",
       "      <th>Enrollment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>5783.000000</td>\n",
       "      <td>5.749000e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2892.000000</td>\n",
       "      <td>1.831949e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1669.552635</td>\n",
       "      <td>4.045437e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1446.500000</td>\n",
       "      <td>6.000000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>2892.000000</td>\n",
       "      <td>1.700000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>4337.500000</td>\n",
       "      <td>5.600000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>5783.000000</td>\n",
       "      <td>2.000000e+07</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Rank    Enrollment\n",
       "count  5783.000000  5.749000e+03\n",
       "mean   2892.000000  1.831949e+04\n",
       "std    1669.552635  4.045437e+05\n",
       "min       1.000000  0.000000e+00\n",
       "25%    1446.500000  6.000000e+01\n",
       "50%    2892.000000  1.700000e+02\n",
       "75%    4337.500000  5.600000e+02\n",
       "max    5783.000000  2.000000e+07"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "descriptive_stats = df.describe()\n",
    "descriptive_stats"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4cdc469b",
   "metadata": {},
   "source": [
    "# Dataset Insights"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9b23ecb5",
   "metadata": {},
   "source": [
    "- The dataset contains 5783 entries with 27 columns.\n",
    "- Data types include one float64, one int64, and 25 object types.\n",
    "- Rank and Enrollment are the only numerical columns with non-null values across all entries.\n",
    "- Acronym, Interventions, Phases, Locations, and Study Documents have a significant number of missing values.\n",
    "- Results First Posted has few non-null entries, suggesting that most studies do not have results posted.\n",
    "- The average enrollment size is 18,319, which is skewed by a few large studies, as indicated by a large standard deviation of   404,543.\n",
    "- The median enrollment size is 170, suggesting that half of the studies have enrollments less than or equal to 170, which is     much lower than the mean.\n",
    "- The maximum enrollment size is 20,000,000, which is an outlier compared to the rest of the data."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9b490f2d",
   "metadata": {},
   "source": [
    "# Q3. Loc and iloc"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "001b8678",
   "metadata": {},
   "source": [
    "- Select all columns for the first clinical trial in the dataset.\n",
    "- Retrieve the Title and Status of the clinical trial with the NCT Number 'NCT04595136'.\n",
    "- Get the Sponsor/Collaborators and Start Date for clinical trials that are Recruiting.\n",
    "- Select the first 5 rows and columns Title, Conditions, and Outcome Measures.\n",
    "- Find the Completion Date and URL for the last 3 clinical trials in the dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "f3e5a9f4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Rank                                                                       1\n",
       "NCT Number                                                       NCT04785898\n",
       "Title                      Diagnostic Performance of the ID Now™ COVID-19...\n",
       "Acronym                                                          COVID-IDNow\n",
       "Status                                                Active, not recruiting\n",
       "Study Results                                           No Results Available\n",
       "Conditions                                                           Covid19\n",
       "Interventions               Diagnostic Test: ID Now™ COVID-19 Screening Test\n",
       "Outcome Measures           Evaluate the diagnostic performance of the ID ...\n",
       "Sponsor/Collaborators                  Groupe Hospitalier Paris Saint Joseph\n",
       "Gender                                                                   All\n",
       "Age                                18 Years and older   (Adult, Older Adult)\n",
       "Phases                                                        Not Applicable\n",
       "Enrollment                                                            1000.0\n",
       "Funded Bys                                                             Other\n",
       "Study Type                                                    Interventional\n",
       "Study Designs              Allocation: N/A|Intervention Model: Single Gro...\n",
       "Other IDs                                                        COVID-IDNow\n",
       "Start Date                                                  November 9, 2020\n",
       "Primary Completion Date                                    December 22, 2020\n",
       "Completion Date                                               April 30, 2021\n",
       "First Posted                                                   March 8, 2021\n",
       "Results First Posted                                                     NaN\n",
       "Last Update Posted                                             March 8, 2021\n",
       "Locations                  Groupe Hospitalier Paris Saint-Joseph, Paris, ...\n",
       "Study Documents                                                          NaN\n",
       "URL                              https://ClinicalTrials.gov/show/NCT04785898\n",
       "Name: 0, dtype: object"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "first_clinical_trial = df.iloc[0]\n",
    "first_clinical_trial"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "2175b33a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Title</th>\n",
       "      <th>Status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Study to Evaluate the Efficacy of COVID19-0001...</td>\n",
       "      <td>Not yet recruiting</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               Title              Status\n",
       "1  Study to Evaluate the Efficacy of COVID19-0001...  Not yet recruiting"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "title_status = df.loc[df['NCT Number'] == 'NCT04595136', ['Title', 'Status']]\n",
    "title_status"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "5663d0e7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sponsor/Collaborators</th>\n",
       "      <th>Start Date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>University of Milano Bicocca</td>\n",
       "      <td>May 7, 2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Centre Hospitalier Régional d'Orléans|Centre d...</td>\n",
       "      <td>May 5, 2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Fundacion Arturo Lopez Perez|Confederación de ...</td>\n",
       "      <td>April 7, 2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>The Christ Hospital</td>\n",
       "      <td>April 28, 2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>University of Milano Bicocca</td>\n",
       "      <td>April 16, 2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5776</th>\n",
       "      <td>University of Pittsburgh|U.S. National Science...</td>\n",
       "      <td>October 26, 2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5777</th>\n",
       "      <td>University of Zurich</td>\n",
       "      <td>August 25, 2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5778</th>\n",
       "      <td>University of Wisconsin, Madison|National Inst...</td>\n",
       "      <td>March 23, 2020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5780</th>\n",
       "      <td>University of Illinois at Urbana-Champaign</td>\n",
       "      <td>February 10, 2021</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5781</th>\n",
       "      <td>Mental Health Services in the Capital Region, ...</td>\n",
       "      <td>March 22, 2021</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2805 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  Sponsor/Collaborators         Start Date\n",
       "2                          University of Milano Bicocca        May 7, 2020\n",
       "4     Centre Hospitalier Régional d'Orléans|Centre d...        May 5, 2020\n",
       "9     Fundacion Arturo Lopez Perez|Confederación de ...      April 7, 2020\n",
       "10                                  The Christ Hospital     April 28, 2020\n",
       "11                         University of Milano Bicocca     April 16, 2020\n",
       "...                                                 ...                ...\n",
       "5776  University of Pittsburgh|U.S. National Science...   October 26, 2020\n",
       "5777                               University of Zurich    August 25, 2020\n",
       "5778  University of Wisconsin, Madison|National Inst...     March 23, 2020\n",
       "5780         University of Illinois at Urbana-Champaign  February 10, 2021\n",
       "5781  Mental Health Services in the Capital Region, ...     March 22, 2021\n",
       "\n",
       "[2805 rows x 2 columns]"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recruiting_info = df.loc[df['Status'] == 'Recruiting', ['Sponsor/Collaborators', 'Start Date']]\n",
    "recruiting_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "e513680b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Title</th>\n",
       "      <th>Conditions</th>\n",
       "      <th>Outcome Measures</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Diagnostic Performance of the ID Now™ COVID-19...</td>\n",
       "      <td>Covid19</td>\n",
       "      <td>Evaluate the diagnostic performance of the ID ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Study to Evaluate the Efficacy of COVID19-0001...</td>\n",
       "      <td>SARS-CoV-2 Infection</td>\n",
       "      <td>Change on viral load results from baseline aft...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Lung CT Scan Analysis of SARS-CoV2 Induced Lun...</td>\n",
       "      <td>covid19</td>\n",
       "      <td>A qualitative analysis of parenchymal lung dam...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>The Role of a Private Hospital in Hong Kong Am...</td>\n",
       "      <td>COVID</td>\n",
       "      <td>Proportion of asymptomatic subjects|Proportion...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Maternal-foetal Transmission of SARS-Cov-2</td>\n",
       "      <td>Maternal Fetal Infection Transmission|COVID-19...</td>\n",
       "      <td>COVID-19 by positive PCR in cord blood and / o...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               Title  \\\n",
       "0  Diagnostic Performance of the ID Now™ COVID-19...   \n",
       "1  Study to Evaluate the Efficacy of COVID19-0001...   \n",
       "2  Lung CT Scan Analysis of SARS-CoV2 Induced Lun...   \n",
       "3  The Role of a Private Hospital in Hong Kong Am...   \n",
       "4         Maternal-foetal Transmission of SARS-Cov-2   \n",
       "\n",
       "                                          Conditions  \\\n",
       "0                                            Covid19   \n",
       "1                               SARS-CoV-2 Infection   \n",
       "2                                            covid19   \n",
       "3                                              COVID   \n",
       "4  Maternal Fetal Infection Transmission|COVID-19...   \n",
       "\n",
       "                                    Outcome Measures  \n",
       "0  Evaluate the diagnostic performance of the ID ...  \n",
       "1  Change on viral load results from baseline aft...  \n",
       "2  A qualitative analysis of parenchymal lung dam...  \n",
       "3  Proportion of asymptomatic subjects|Proportion...  \n",
       "4  COVID-19 by positive PCR in cord blood and / o...  "
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "first_5_trials = df.loc[0:4, ['Title', 'Conditions', 'Outcome Measures']]\n",
    "first_5_trials"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "4e591b9e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Completion Date</th>\n",
       "      <th>First Posted</th>\n",
       "      <th>Results First Posted</th>\n",
       "      <th>Last Update Posted</th>\n",
       "      <th>Locations</th>\n",
       "      <th>Study Documents</th>\n",
       "      <th>URL</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5780</th>\n",
       "      <td>July 2021</td>\n",
       "      <td>February 5, 2021</td>\n",
       "      <td>NaN</td>\n",
       "      <td>February 24, 2021</td>\n",
       "      <td>University of Illinois at Urbana-Champaign, Ur...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://ClinicalTrials.gov/show/NCT04740229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5781</th>\n",
       "      <td>December 31, 2022</td>\n",
       "      <td>March 18, 2021</td>\n",
       "      <td>NaN</td>\n",
       "      <td>April 1, 2021</td>\n",
       "      <td>Mental Health Services in the Capital Region, ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://ClinicalTrials.gov/show/NCT04804917</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5782</th>\n",
       "      <td>February 2025</td>\n",
       "      <td>December 22, 2020</td>\n",
       "      <td>NaN</td>\n",
       "      <td>December 22, 2020</td>\n",
       "      <td>Uniformed Services University for the Health S...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>https://ClinicalTrials.gov/show/NCT04680000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Completion Date       First Posted Results First Posted  \\\n",
       "5780          July 2021   February 5, 2021                  NaN   \n",
       "5781  December 31, 2022     March 18, 2021                  NaN   \n",
       "5782      February 2025  December 22, 2020                  NaN   \n",
       "\n",
       "     Last Update Posted                                          Locations  \\\n",
       "5780  February 24, 2021  University of Illinois at Urbana-Champaign, Ur...   \n",
       "5781      April 1, 2021  Mental Health Services in the Capital Region, ...   \n",
       "5782  December 22, 2020  Uniformed Services University for the Health S...   \n",
       "\n",
       "     Study Documents                                          URL  \n",
       "5780             NaN  https://ClinicalTrials.gov/show/NCT04740229  \n",
       "5781             NaN  https://ClinicalTrials.gov/show/NCT04804917  \n",
       "5782             NaN  https://ClinicalTrials.gov/show/NCT04680000  "
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "last_3_trials = df.iloc[-3:, df.columns.get_loc('Completion Date'):df.columns.get_loc('URL')+1]\n",
    "last_3_trials"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5f9c3278",
   "metadata": {},
   "source": [
    "#### Q4. Determine the missing values in the whole dataset and analyze missing values in each column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "54d4ade0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total missing values in the dataset: 18805\n"
     ]
    }
   ],
   "source": [
    "total_missing_values = df.isnull().sum().sum()\n",
    "print(f\"Total missing values in the dataset: {total_missing_values}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "7428b84c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Missing values in each column:\n",
      "Rank                          0\n",
      "NCT Number                    0\n",
      "Title                         0\n",
      "Acronym                    3303\n",
      "Status                        0\n",
      "Study Results                 0\n",
      "Conditions                    0\n",
      "Interventions               886\n",
      "Outcome Measures             35\n",
      "Sponsor/Collaborators         0\n",
      "Gender                       10\n",
      "Age                           0\n",
      "Phases                     2461\n",
      "Enrollment                   34\n",
      "Funded Bys                    0\n",
      "Study Type                    0\n",
      "Study Designs                35\n",
      "Other IDs                     1\n",
      "Start Date                   34\n",
      "Primary Completion Date      36\n",
      "Completion Date              36\n",
      "First Posted                  0\n",
      "Results First Posted       5747\n",
      "Last Update Posted            0\n",
      "Locations                   586\n",
      "Study Documents            5601\n",
      "URL                           0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "missing_values_per_column = df.isnull().sum()\n",
    "print(\"Missing values in each column:\")\n",
    "print(missing_values_per_column)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "51075ffa",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Percentage of missing values in each column:\n",
      "Rank                        0.000000\n",
      "NCT Number                  0.000000\n",
      "Title                       0.000000\n",
      "Acronym                    57.115684\n",
      "Status                      0.000000\n",
      "Study Results               0.000000\n",
      "Conditions                  0.000000\n",
      "Interventions              15.320768\n",
      "Outcome Measures            0.605222\n",
      "Sponsor/Collaborators       0.000000\n",
      "Gender                      0.172921\n",
      "Age                         0.000000\n",
      "Phases                     42.555767\n",
      "Enrollment                  0.587930\n",
      "Funded Bys                  0.000000\n",
      "Study Type                  0.000000\n",
      "Study Designs               0.605222\n",
      "Other IDs                   0.017292\n",
      "Start Date                  0.587930\n",
      "Primary Completion Date     0.622514\n",
      "Completion Date             0.622514\n",
      "First Posted                0.000000\n",
      "Results First Posted       99.377486\n",
      "Last Update Posted          0.000000\n",
      "Locations                  10.133149\n",
      "Study Documents            96.852845\n",
      "URL                         0.000000\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "percentage_missing = (missing_values_per_column / len(df)) * 100\n",
    "print(\"Percentage of missing values in each column:\")\n",
    "print(percentage_missing)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8a8943a5",
   "metadata": {},
   "source": [
    "#### Q5. Calculate the sum of duplicate rows"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "cae78405",
   "metadata": {},
   "outputs": [],
   "source": [
    "duplicate_rows = df[df.duplicated()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "2a29a019",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sum of duplicate rows for numeric columns:\n",
      "Rank          0.0\n",
      "Enrollment    0.0\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "sum_of_duplicates = duplicate_rows.sum(numeric_only=True)\n",
    "sum_of_duplicates\n",
    "\n",
    "print(\"Sum of duplicate rows for numeric columns:\")\n",
    "print(sum_of_duplicates)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f33b43a2",
   "metadata": {},
   "source": [
    "## Q6. Solve following question by using conditional statements"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1f8a1b35",
   "metadata": {},
   "source": [
    "- How many studies have an enrollment greater than a certain threshold? \n",
    "- How many clinical trials have 'No Results Available'? \n",
    "- How many clinical trials are in an \"Completed\" and \"Recruiting\" status? \n",
    "- How many clinical trials are related to 'COVID-19'? \n",
    "- How many clinical trials started after January 1, 2020"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "1b517edb",
   "metadata": {},
   "outputs": [],
   "source": [
    "enrollment_threshold = 1000\n",
    "studies_above_threshold = df[df['Enrollment'] > enrollment_threshold].shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "ae474e6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "no_results_count = df[df['Study Results'] == 'No Results Available'].shape[0]\n",
    "completed_count = df[df['Status'] == 'Completed'].shape[0]\n",
    "recruiting_count = df[df['Status'] == 'Recruiting'].shape[0]\n",
    "covid_trials_count = df[df['Conditions'].str.contains('COVID-19', case=False, na=False)].shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "f03f11cf",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df['Start Date'] = pd.to_datetime(df['Start Date'], errors='coerce')\n",
    "threshold_date = pd.to_datetime('2020-01-01')\n",
    "\n",
    "trials_started_after_2020 = df[df['Start Date'] > threshold_date].shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "57a80afc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Studies with enrollment greater than 1000: 911\n",
      "Clinical trials with 'No Results Available': 5747\n",
      "Clinical trials in 'Completed' status: 1025\n",
      "Clinical trials in 'Recruiting' status: 2805\n",
      "Clinical trials related to 'COVID-19': 1872\n",
      "Clinical trials started after January 1, 2020: 5066\n"
     ]
    }
   ],
   "source": [
    "print(f\"Studies with enrollment greater than {enrollment_threshold}: {studies_above_threshold}\")\n",
    "print(f\"Clinical trials with 'No Results Available': {no_results_count}\")\n",
    "print(f\"Clinical trials in 'Completed' status: {completed_count}\")\n",
    "print(f\"Clinical trials in 'Recruiting' status: {recruiting_count}\")\n",
    "print(f\"Clinical trials related to 'COVID-19': {covid_trials_count}\")\n",
    "print(f\"Clinical trials started after January 1, 2020: {trials_started_after_2020}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}