--- a +++ b/Covid_Clinical_Trials_Analysis.ipynb @@ -0,0 +1,2438 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bb78e666", + "metadata": {}, + "source": [ + "# Assignment: 01" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "947166c4", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e66ecaac", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"COVID clinical trials.csv\")" + ] + }, + { + "cell_type": "markdown", + "id": "0fb72044", + "metadata": {}, + "source": [ + "#### Q1. Read Dataset and Explore the dataset by checking shape, columns, see the first/last 'n' rows using head/tail. (n= 5,15,30) " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "950e9b57", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape of the dataset: (5783, 27)\n" + ] + } + ], + "source": [ + "print(\"Shape of the dataset:\", df.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "648b76fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Columns in the dataset: Index(['Rank', 'NCT Number', 'Title', 'Acronym', 'Status', 'Study Results',\n", + " 'Conditions', 'Interventions', 'Outcome Measures',\n", + " 'Sponsor/Collaborators', 'Gender', 'Age', 'Phases', 'Enrollment',\n", + " 'Funded Bys', 'Study Type', 'Study Designs', 'Other IDs', 'Start Date',\n", + " 'Primary Completion Date', 'Completion Date', 'First Posted',\n", + " 'Results First Posted', 'Last Update Posted', 'Locations',\n", + " 'Study Documents', 'URL'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "print(\"Columns in the dataset:\", df.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "d87b8aeb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['All', 'Female', 'Male', nan], dtype=object)" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Gender'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "aa5ec1f7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Gender\n", + "All 5567\n", + "Female 162\n", + "Male 44\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Gender'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "9da4780a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sum of duplicate rows for numeric columns:\n", + "Rank 0\n", + "NCT Number 0\n", + "Title 0\n", + "Acronym 0\n", + "Status 0\n", + "Study Results 0\n", + "Conditions 0\n", + "Interventions 0\n", + "Outcome Measures 0\n", + "Sponsor/Collaborators 0\n", + "Gender 0\n", + "Age 0\n", + "Phases 0\n", + "Enrollment 0.0\n", + "Funded Bys 0\n", + "Study Type 0\n", + "Study Designs 0\n", + "Other IDs 0\n", + "Start Date 0\n", + "Primary Completion Date 0\n", + "Completion Date 0\n", + "First Posted 0\n", + "Results First Posted 0\n", + "Last Update Posted 0\n", + "Locations 0\n", + "Study Documents 0\n", + "URL 0\n", + "dtype: object\n" + ] + } + ], + "source": [ + "print(\"Sum of duplicate rows for numeric columns:\")\n", + "print(sum_of_duplicates)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "652eff47", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First 5 rows: Rank NCT Number Title \\\n", + "0 1 NCT04785898 Diagnostic Performance of the ID Now™ COVID-19... \n", + "1 2 NCT04595136 Study to Evaluate the Efficacy of COVID19-0001... \n", + "2 3 NCT04395482 Lung CT Scan Analysis of SARS-CoV2 Induced Lun... \n", + "3 4 NCT04416061 The Role of a Private Hospital in Hong Kong Am... \n", + "4 5 NCT04395924 Maternal-foetal Transmission of SARS-Cov-2 \n", + "\n", + " Acronym Status Study Results \\\n", + "0 COVID-IDNow Active, not recruiting No Results Available \n", + "1 COVID-19 Not yet recruiting No Results Available \n", + "2 TAC-COVID19 Recruiting No Results Available \n", + "3 COVID-19 Active, not recruiting No Results Available \n", + "4 TMF-COVID-19 Recruiting No Results Available \n", + "\n", + " Conditions \\\n", + "0 Covid19 \n", + "1 SARS-CoV-2 Infection \n", + "2 covid19 \n", + "3 COVID \n", + "4 Maternal Fetal Infection Transmission|COVID-19... \n", + "\n", + " Interventions \\\n", + "0 Diagnostic Test: ID Now™ COVID-19 Screening Test \n", + "1 Drug: Drug COVID19-0001-USR|Drug: normal saline \n", + "2 Other: Lung CT scan analysis in COVID-19 patients \n", + "3 Diagnostic Test: COVID 19 Diagnostic Test \n", + "4 Diagnostic Test: Diagnosis of SARS-Cov2 by RT-... \n", + "\n", + " Outcome Measures \\\n", + "0 Evaluate the diagnostic performance of the ID ... \n", + "1 Change on viral load results from baseline aft... \n", + "2 A qualitative analysis of parenchymal lung dam... \n", + "3 Proportion of asymptomatic subjects|Proportion... \n", + "4 COVID-19 by positive PCR in cord blood and / o... \n", + "\n", + " Sponsor/Collaborators ... Other IDs \\\n", + "0 Groupe Hospitalier Paris Saint Joseph ... COVID-IDNow \n", + "1 United Medical Specialties ... COVID19-0001-USR \n", + "2 University of Milano Bicocca ... TAC-COVID19 \n", + "3 Hong Kong Sanatorium & Hospital ... RC-2020-08 \n", + "4 Centre Hospitalier Régional d'Orléans|Centre d... ... CHRO-2020-10 \n", + "\n", + " Start Date Primary Completion Date Completion Date \\\n", + "0 November 9, 2020 December 22, 2020 April 30, 2021 \n", + "1 November 2, 2020 December 15, 2020 January 29, 2021 \n", + "2 May 7, 2020 June 15, 2021 June 15, 2021 \n", + "3 May 25, 2020 July 31, 2020 August 31, 2020 \n", + "4 May 5, 2020 May 2021 May 2021 \n", + "\n", + " First Posted Results First Posted Last Update Posted \\\n", + "0 March 8, 2021 NaN March 8, 2021 \n", + "1 October 20, 2020 NaN October 20, 2020 \n", + "2 May 20, 2020 NaN November 9, 2020 \n", + "3 June 4, 2020 NaN June 4, 2020 \n", + "4 May 20, 2020 NaN June 4, 2020 \n", + "\n", + " Locations Study Documents \\\n", + "0 Groupe Hospitalier Paris Saint-Joseph, Paris, ... NaN \n", + "1 Cimedical, Barranquilla, Atlantico, Colombia NaN \n", + "2 Ospedale Papa Giovanni XXIII, Bergamo, Italy|P... NaN \n", + "3 Hong Kong Sanatorium & Hospital, Hong Kong, Ho... NaN \n", + "4 CHR Orléans, Orléans, France NaN \n", + "\n", + " URL \n", + "0 https://ClinicalTrials.gov/show/NCT04785898 \n", + "1 https://ClinicalTrials.gov/show/NCT04595136 \n", + "2 https://ClinicalTrials.gov/show/NCT04395482 \n", + "3 https://ClinicalTrials.gov/show/NCT04416061 \n", + "4 https://ClinicalTrials.gov/show/NCT04395924 \n", + "\n", + "[5 rows x 27 columns]\n" + ] + } + ], + "source": [ + "print(\"First 5 rows:\", df.head(5))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "60183864", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First 15 rows: Rank NCT Number Title \\\n", + "0 1 NCT04785898 Diagnostic Performance of the ID Now™ COVID-19... \n", + "1 2 NCT04595136 Study to Evaluate the Efficacy of COVID19-0001... \n", + "2 3 NCT04395482 Lung CT Scan Analysis of SARS-CoV2 Induced Lun... \n", + "3 4 NCT04416061 The Role of a Private Hospital in Hong Kong Am... \n", + "4 5 NCT04395924 Maternal-foetal Transmission of SARS-Cov-2 \n", + "5 6 NCT04516954 Convalescent Plasma for COVID-19 Patients \n", + "6 7 NCT04476940 COVID-19 Breastfeeding Guideline for African-A... \n", + "7 8 NCT04634214 The Severity of COVID 19 in Diabetes and Non-d... \n", + "8 9 NCT04602884 Early Detection of COVID-19 Using Breath Analysis \n", + "9 10 NCT04384588 COVID19-Convalescent Plasma for Treating Patie... \n", + "10 11 NCT04355897 CoVID-19 Plasma in Treatment of COVID-19 Patients \n", + "11 12 NCT04412265 Frailty in Elderly Patients With COVID-19 \n", + "12 13 NCT04659759 COVID-19 Pregnancy Related Immunological, Clin... \n", + "13 14 NCT04427332 Smell and Taste Disorders in COVID-19 Patients \n", + "14 15 NCT04842708 Evaluation of Anti-COVID 19 Pfizer Vaccination... \n", + "\n", + " Acronym Status Study Results \\\n", + "0 COVID-IDNow Active, not recruiting No Results Available \n", + "1 COVID-19 Not yet recruiting No Results Available \n", + "2 TAC-COVID19 Recruiting No Results Available \n", + "3 COVID-19 Active, not recruiting No Results Available \n", + "4 TMF-COVID-19 Recruiting No Results Available \n", + "5 CPCP Enrolling by invitation No Results Available \n", + "6 COVID-BF Not yet recruiting No Results Available \n", + "7 COVID19 Not yet recruiting No Results Available \n", + "8 COVID-19 Suspended No Results Available \n", + "9 FALP-COVID Recruiting No Results Available \n", + "10 NaN Recruiting No Results Available \n", + "11 FRA-COVID Recruiting No Results Available \n", + "12 COVID-PRICE Recruiting No Results Available \n", + "13 COVID-19 ORL Completed No Results Available \n", + "14 COVID-19 Recruiting No Results Available \n", + "\n", + " Conditions \\\n", + "0 Covid19 \n", + "1 SARS-CoV-2 Infection \n", + "2 covid19 \n", + "3 COVID \n", + "4 Maternal Fetal Infection Transmission|COVID-19... \n", + "5 COVID 19 \n", + "6 Covid19|Exclusive Breastfeeding \n", + "7 Covid19|Type2 Diabetes \n", + "8 Covid19 \n", + "9 COVID-19 Infection|Cancer Patients|General Pop... \n", + "10 COVID 19 \n", + "11 Covid19 \n", + "12 Covid19 \n", + "13 covid19 \n", + "14 Covid19 \n", + "\n", + " Interventions \\\n", + "0 Diagnostic Test: ID Now™ COVID-19 Screening Test \n", + "1 Drug: Drug COVID19-0001-USR|Drug: normal saline \n", + "2 Other: Lung CT scan analysis in COVID-19 patients \n", + "3 Diagnostic Test: COVID 19 Diagnostic Test \n", + "4 Diagnostic Test: Diagnosis of SARS-Cov2 by RT-... \n", + "5 Biological: Convalescent COVID 19 Plasma \n", + "6 Behavioral: COVID-19 Breastfeeding Support \n", + "7 NaN \n", + "8 Diagnostic Test: Breath biopsy sampling using ... \n", + "9 Biological: Convalescent Plasma from COVID-19 ... \n", + "10 Biological: Convalescent COVID 19 Plasma \n", + "11 Other: Relation between frailty and clinical o... \n", + "12 Other: COVID-19 exposure|Biological: COVID-19 ... \n", + "13 Other: Investigation of smell and taste disorders \n", + "14 Diagnostic Test: vaccination against COVID19 \n", + "\n", + " Outcome Measures \\\n", + "0 Evaluate the diagnostic performance of the ID ... \n", + "1 Change on viral load results from baseline aft... \n", + "2 A qualitative analysis of parenchymal lung dam... \n", + "3 Proportion of asymptomatic subjects|Proportion... \n", + "4 COVID-19 by positive PCR in cord blood and / o... \n", + "5 Evaluate the safety|Change in requirement for ... \n", + "6 COVID-19 breastfeeding guidance adherence at b... \n", + "7 Severity of COVID 19 among people with and wit... \n", + "8 Correlation between Volatile Organic Compounds... \n", + "9 in-hospital mortality secondary to COVID-19 am... \n", + "10 Reduce mortality|Reduce requirement for mechan... \n", + "11 Development of a tool to measure frailty|A \"pr... \n", + "12 Maternal COVID-19 serology (IgG and IgM)|Mater... \n", + "13 Identification of demographic and clinical fac... \n", + "14 Association between breath VOCs and IgG in blo... \n", + "\n", + " Sponsor/Collaborators ... Other IDs \\\n", + "0 Groupe Hospitalier Paris Saint Joseph ... COVID-IDNow \n", + "1 United Medical Specialties ... COVID19-0001-USR \n", + "2 University of Milano Bicocca ... TAC-COVID19 \n", + "3 Hong Kong Sanatorium & Hospital ... RC-2020-08 \n", + "4 Centre Hospitalier Régional d'Orléans|Centre d... ... CHRO-2020-10 \n", + "5 Vinmec Research Institute of Stem Cell and Gen... ... ISC.20.11.1 \n", + "6 Meharry Medical College ... 330875 \n", + "7 India Diabetes Research Foundation & Dr. A. Ra... ... IDRFARH015 \n", + "8 Scentech Medical Technologies Ltd ... Cov-2-IDF \n", + "9 Fundacion Arturo Lopez Perez|Confederación de ... ... FALP 001-2020 \n", + "10 The Christ Hospital ... 20-23 \n", + "11 University of Milano Bicocca ... FRA-COVID \n", + "12 Thomas Jefferson University|Nemours ... 20F.1043 \n", + "13 University of Milano Bicocca ... COVID-19 ORL \n", + "14 Scentech Medical Technologies Ltd ... Cov-2-SMC-V-2020 \n", + "\n", + " Start Date Primary Completion Date Completion Date \\\n", + "0 November 9, 2020 December 22, 2020 April 30, 2021 \n", + "1 November 2, 2020 December 15, 2020 January 29, 2021 \n", + "2 May 7, 2020 June 15, 2021 June 15, 2021 \n", + "3 May 25, 2020 July 31, 2020 August 31, 2020 \n", + "4 May 5, 2020 May 2021 May 2021 \n", + "5 August 1, 2020 November 30, 2020 December 30, 2020 \n", + "6 September 2020 October 2021 June 2022 \n", + "7 November 16, 2020 February 16, 2021 May 16, 2021 \n", + "8 September 22, 2020 December 30, 2021 December 30, 2021 \n", + "9 April 7, 2020 April 6, 2021 April 6, 2021 \n", + "10 April 28, 2020 July 2020 August 2020 \n", + "11 April 16, 2020 March 1, 2021 March 1, 2021 \n", + "12 November 17, 2020 December 31, 2021 June 30, 2022 \n", + "13 June 11, 2020 October 16, 2020 October 16, 2020 \n", + "14 December 24, 2020 October 24, 2021 October 24, 2021 \n", + "\n", + " First Posted Results First Posted Last Update Posted \\\n", + "0 March 8, 2021 NaN March 8, 2021 \n", + "1 October 20, 2020 NaN October 20, 2020 \n", + "2 May 20, 2020 NaN November 9, 2020 \n", + "3 June 4, 2020 NaN June 4, 2020 \n", + "4 May 20, 2020 NaN June 4, 2020 \n", + "5 August 18, 2020 NaN August 20, 2020 \n", + "6 July 20, 2020 NaN July 20, 2020 \n", + "7 November 18, 2020 NaN November 18, 2020 \n", + "8 October 26, 2020 NaN April 13, 2021 \n", + "9 May 12, 2020 NaN May 12, 2020 \n", + "10 April 21, 2020 NaN May 20, 2020 \n", + "11 June 2, 2020 NaN January 22, 2021 \n", + "12 December 9, 2020 NaN March 5, 2021 \n", + "13 June 11, 2020 NaN January 27, 2021 \n", + "14 April 13, 2021 NaN April 13, 2021 \n", + "\n", + " Locations Study Documents \\\n", + "0 Groupe Hospitalier Paris Saint-Joseph, Paris, ... NaN \n", + "1 Cimedical, Barranquilla, Atlantico, Colombia NaN \n", + "2 Ospedale Papa Giovanni XXIII, Bergamo, Italy|P... NaN \n", + "3 Hong Kong Sanatorium & Hospital, Hong Kong, Ho... NaN \n", + "4 CHR Orléans, Orléans, France NaN \n", + "5 Vinmec Research Institute of Stem cell and Gen... NaN \n", + "6 Meharry Medical College, Nashville, Tennessee,... NaN \n", + "7 Orthomed Hospital, Chennai, Tamil Nadu, India|... NaN \n", + "8 IDF COVID 19 Isolation Facility, Ashkelon, Israel NaN \n", + "9 Fundacion Arturo Lopez Perez, Providencia, San... NaN \n", + "10 The Christ Hospital, Cincinnati, Ohio, United ... NaN \n", + "11 Dipartimento di Geriatria, Fondazione Poliambu... NaN \n", + "12 Thomas Jefferson University Hospital, Philadel... NaN \n", + "13 ASST Monza-Ospedale San Gerardo, Monza, Italy NaN \n", + "14 Shamir Medical Center, Be'er Ya'aqov, Israel NaN \n", + "\n", + " URL \n", + "0 https://ClinicalTrials.gov/show/NCT04785898 \n", + "1 https://ClinicalTrials.gov/show/NCT04595136 \n", + "2 https://ClinicalTrials.gov/show/NCT04395482 \n", + "3 https://ClinicalTrials.gov/show/NCT04416061 \n", + "4 https://ClinicalTrials.gov/show/NCT04395924 \n", + "5 https://ClinicalTrials.gov/show/NCT04516954 \n", + "6 https://ClinicalTrials.gov/show/NCT04476940 \n", + "7 https://ClinicalTrials.gov/show/NCT04634214 \n", + "8 https://ClinicalTrials.gov/show/NCT04602884 \n", + "9 https://ClinicalTrials.gov/show/NCT04384588 \n", + "10 https://ClinicalTrials.gov/show/NCT04355897 \n", + "11 https://ClinicalTrials.gov/show/NCT04412265 \n", + "12 https://ClinicalTrials.gov/show/NCT04659759 \n", + "13 https://ClinicalTrials.gov/show/NCT04427332 \n", + "14 https://ClinicalTrials.gov/show/NCT04842708 \n", + "\n", + "[15 rows x 27 columns]\n" + ] + } + ], + "source": [ + "print(\"First 15 rows:\", df.head(15))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "a207a23d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First 30 rows: Rank NCT Number Title \\\n", + "0 1 NCT04785898 Diagnostic Performance of the ID Now™ COVID-19... \n", + "1 2 NCT04595136 Study to Evaluate the Efficacy of COVID19-0001... \n", + "2 3 NCT04395482 Lung CT Scan Analysis of SARS-CoV2 Induced Lun... \n", + "3 4 NCT04416061 The Role of a Private Hospital in Hong Kong Am... \n", + "4 5 NCT04395924 Maternal-foetal Transmission of SARS-Cov-2 \n", + "5 6 NCT04516954 Convalescent Plasma for COVID-19 Patients \n", + "6 7 NCT04476940 COVID-19 Breastfeeding Guideline for African-A... \n", + "7 8 NCT04634214 The Severity of COVID 19 in Diabetes and Non-d... \n", + "8 9 NCT04602884 Early Detection of COVID-19 Using Breath Analysis \n", + "9 10 NCT04384588 COVID19-Convalescent Plasma for Treating Patie... \n", + "10 11 NCT04355897 CoVID-19 Plasma in Treatment of COVID-19 Patients \n", + "11 12 NCT04412265 Frailty in Elderly Patients With COVID-19 \n", + "12 13 NCT04659759 COVID-19 Pregnancy Related Immunological, Clin... \n", + "13 14 NCT04427332 Smell and Taste Disorders in COVID-19 Patients \n", + "14 15 NCT04842708 Evaluation of Anti-COVID 19 Pfizer Vaccination... \n", + "15 16 NCT04367805 COVID-19 Infection in Patients With Hepatocell... \n", + "16 17 NCT04609969 Diagnostic Performance Evaluation of a Novel S... \n", + "17 18 NCT04817553 Impact of COVID-19 on the Clinical Outcomes an... \n", + "18 19 NCT04632355 Musculoskeletal Pain in Patients With Covid-19... \n", + "19 20 NCT04407585 Testing the Accuracy of a Digital Test to Diag... \n", + "20 21 NCT04657510 Femoral frACturEs and COVID-19. \n", + "21 22 NCT04473170 Study Evaluating the Safety and Efficacy of Au... \n", + "22 23 NCT04497298 Clinical Trial to Evaluate the Safety and Immu... \n", + "23 24 NCT04549831 Genetic Bases of COVID-19 Clinical Variability \n", + "24 25 NCT04706390 Comparison of the Immune Response to Natural C... \n", + "25 26 NCT04632719 The MentalPlus® for Assessment and Rehabilitat... \n", + "26 27 NCT04427345 Predictive Factors COVID-19 Patients \n", + "27 28 NCT04424004 MURDOCK Cabarrus County COVID-19 Prevalence an... \n", + "28 29 NCT04407494 Diagnostic Value of Patient - Reported and Cli... \n", + "29 30 NCT04360811 Assessment of Obstetric, Fetal and Neonatal Ri... \n", + "\n", + " Acronym Status Study Results \\\n", + "0 COVID-IDNow Active, not recruiting No Results Available \n", + "1 COVID-19 Not yet recruiting No Results Available \n", + "2 TAC-COVID19 Recruiting No Results Available \n", + "3 COVID-19 Active, not recruiting No Results Available \n", + "4 TMF-COVID-19 Recruiting No Results Available \n", + "5 CPCP Enrolling by invitation No Results Available \n", + "6 COVID-BF Not yet recruiting No Results Available \n", + "7 COVID19 Not yet recruiting No Results Available \n", + "8 COVID-19 Suspended No Results Available \n", + "9 FALP-COVID Recruiting No Results Available \n", + "10 NaN Recruiting No Results Available \n", + "11 FRA-COVID Recruiting No Results Available \n", + "12 COVID-PRICE Recruiting No Results Available \n", + "13 COVID-19 ORL Completed No Results Available \n", + "14 COVID-19 Recruiting No Results Available \n", + "15 COVID19-CHIEF Recruiting No Results Available \n", + "16 COVID-VIRO Completed No Results Available \n", + "17 IgG4-COVID Recruiting No Results Available \n", + "18 Covid19-Pain Not yet recruiting No Results Available \n", + "19 NaN Recruiting No Results Available \n", + "20 FACE COVID-19 Recruiting No Results Available \n", + "21 SENTAD-COVID Completed No Results Available \n", + "22 COVID-19-101 Active, not recruiting No Results Available \n", + "23 GEN-COVID Recruiting No Results Available \n", + "24 COVID19vac-1 Recruiting No Results Available \n", + "25 MP-COVID Recruiting No Results Available \n", + "26 MI-COVID Recruiting No Results Available \n", + "27 C3PI Active, not recruiting No Results Available \n", + "28 COVID-OLFA Completed No Results Available \n", + "29 COroFet Recruiting No Results Available \n", + "\n", + " Conditions \\\n", + "0 Covid19 \n", + "1 SARS-CoV-2 Infection \n", + "2 covid19 \n", + "3 COVID \n", + "4 Maternal Fetal Infection Transmission|COVID-19... \n", + "5 COVID 19 \n", + "6 Covid19|Exclusive Breastfeeding \n", + "7 Covid19|Type2 Diabetes \n", + "8 Covid19 \n", + "9 COVID-19 Infection|Cancer Patients|General Pop... \n", + "10 COVID 19 \n", + "11 Covid19 \n", + "12 Covid19 \n", + "13 covid19 \n", + "14 Covid19 \n", + "15 Hepatocellular Carcinoma|COVID-19 \n", + "16 Covid19|SARS-CoV-2 Infection \n", + "17 IgG4 Related Disease|Covid19 \n", + "18 Covid19 \n", + "19 Covid-19 \n", + "20 Covid19|Femur Fracture|Fragility Fracture \n", + "21 Coronavirus Disease 2019 (COVID-19) \n", + "22 COVID-19 \n", + "23 COVID-19 \n", + "24 Covid19 \n", + "25 Cognitive Impairment|Covid19|Cognitive Dysfunc... \n", + "26 covid19 \n", + "27 COVID 19 \n", + "28 COVID-19 \n", + "29 Pregnancy \n", + "\n", + " Interventions \\\n", + "0 Diagnostic Test: ID Now™ COVID-19 Screening Test \n", + "1 Drug: Drug COVID19-0001-USR|Drug: normal saline \n", + "2 Other: Lung CT scan analysis in COVID-19 patients \n", + "3 Diagnostic Test: COVID 19 Diagnostic Test \n", + "4 Diagnostic Test: Diagnosis of SARS-Cov2 by RT-... \n", + "5 Biological: Convalescent COVID 19 Plasma \n", + "6 Behavioral: COVID-19 Breastfeeding Support \n", + "7 NaN \n", + "8 Diagnostic Test: Breath biopsy sampling using ... \n", + "9 Biological: Convalescent Plasma from COVID-19 ... \n", + "10 Biological: Convalescent COVID 19 Plasma \n", + "11 Other: Relation between frailty and clinical o... \n", + "12 Other: COVID-19 exposure|Biological: COVID-19 ... \n", + "13 Other: Investigation of smell and taste disorders \n", + "14 Diagnostic Test: vaccination against COVID19 \n", + "15 Diagnostic Test: nasopharyngeal Covid 19 RT-PCR \n", + "16 Diagnostic Test: RT-qPCR test|Diagnostic Test:... \n", + "17 Other: exposure to COVID19 \n", + "18 Other: Interview \n", + "19 Diagnostic Test: Covid-19 swab PCR test \n", + "20 Other: COVID-19 \n", + "21 Biological: Autologous Non-Hematopoietic Perip... \n", + "22 Biological: Two COVID-19 vaccine candidate (TM... \n", + "23 Genetic: Massive parallel sequencing of host g... \n", + "24 Biological: covid-19 vaccine \n", + "25 Device: The use of the MentalPlus® digital gam... \n", + "26 Other: Predictive factors for clinical respons... \n", + "27 Other: COVID-19 PCR and serology testing \n", + "28 Biological: Reporting of anosmia, ageusia and ... \n", + "29 Diagnostic Test: COVID 19 diagnostic test by PCR \n", + "\n", + " Outcome Measures \\\n", + "0 Evaluate the diagnostic performance of the ID ... \n", + "1 Change on viral load results from baseline aft... \n", + "2 A qualitative analysis of parenchymal lung dam... \n", + "3 Proportion of asymptomatic subjects|Proportion... \n", + "4 COVID-19 by positive PCR in cord blood and / o... \n", + "5 Evaluate the safety|Change in requirement for ... \n", + "6 COVID-19 breastfeeding guidance adherence at b... \n", + "7 Severity of COVID 19 among people with and wit... \n", + "8 Correlation between Volatile Organic Compounds... \n", + "9 in-hospital mortality secondary to COVID-19 am... \n", + "10 Reduce mortality|Reduce requirement for mechan... \n", + "11 Development of a tool to measure frailty|A \"pr... \n", + "12 Maternal COVID-19 serology (IgG and IgM)|Mater... \n", + "13 Identification of demographic and clinical fac... \n", + "14 Association between breath VOCs and IgG in blo... \n", + "15 Incidence of COVID-19 infection in patients wi... \n", + "16 Evaluation of COVID VIRO® diagnostic specifici... \n", + "17 Incidence of COVID-19 in IgG4 patients with pa... \n", + "18 Self-reported Version of the Leeds Assessment ... \n", + "19 SARS-CoV-2 infection \n", + "20 Survival at discharge day (comparison between ... \n", + "21 Adverse reactions incidence.|Rate of mortality... \n", + "22 To assess the safety and tolerability of the C... \n", + "23 To identify the genetic determinants of COVID-... \n", + "24 immune responses|Duration and breadth of B- an... \n", + "25 Assessment of cognitive functions after COVID-... \n", + "26 Identify risk factors for intra-hospital morta... \n", + "27 Estimate the prevalence of COVID-19 infection ... \n", + "28 Diagnostic values of anosmia and ageusia for C... \n", + "29 number of positive COVID-19 women \n", + "\n", + " Sponsor/Collaborators ... \\\n", + "0 Groupe Hospitalier Paris Saint Joseph ... \n", + "1 United Medical Specialties ... \n", + "2 University of Milano Bicocca ... \n", + "3 Hong Kong Sanatorium & Hospital ... \n", + "4 Centre Hospitalier Régional d'Orléans|Centre d... ... \n", + "5 Vinmec Research Institute of Stem Cell and Gen... ... \n", + "6 Meharry Medical College ... \n", + "7 India Diabetes Research Foundation & Dr. A. Ra... ... \n", + "8 Scentech Medical Technologies Ltd ... \n", + "9 Fundacion Arturo Lopez Perez|Confederación de ... ... \n", + "10 The Christ Hospital ... \n", + "11 University of Milano Bicocca ... \n", + "12 Thomas Jefferson University|Nemours ... \n", + "13 University of Milano Bicocca ... \n", + "14 Scentech Medical Technologies Ltd ... \n", + "15 Centre Hospitalier Universitaire, Amiens|Centr... ... \n", + "16 Centre Hospitalier Régional d'Orléans ... \n", + "17 Chinese University of Hong Kong ... \n", + "18 Fondazione Don Carlo Gnocchi Onlus ... \n", + "19 King's College London|Zoe Global Limited|Depar... ... \n", + "20 Istituto Ortopedico Galeazzi ... \n", + "21 Abu Dhabi Stem Cells Center ... \n", + "22 Institut Pasteur|Themis Bioscience GmbH|Coalit... ... \n", + "23 University of Siena|Policlinico San Matteo Pav... ... \n", + "24 University of Bergen|Haukeland University Hosp... ... \n", + "25 University of Sao Paulo ... \n", + "26 University of Milano Bicocca ... \n", + "27 Duke University|North Carolina Department of H... ... \n", + "28 University Hospital, Montpellier ... \n", + "29 University Hospital, Toulouse ... \n", + "\n", + " Other IDs Start Date Primary Completion Date \\\n", + "0 COVID-IDNow November 9, 2020 December 22, 2020 \n", + "1 COVID19-0001-USR November 2, 2020 December 15, 2020 \n", + "2 TAC-COVID19 May 7, 2020 June 15, 2021 \n", + "3 RC-2020-08 May 25, 2020 July 31, 2020 \n", + "4 CHRO-2020-10 May 5, 2020 May 2021 \n", + "5 ISC.20.11.1 August 1, 2020 November 30, 2020 \n", + "6 330875 September 2020 October 2021 \n", + "7 IDRFARH015 November 16, 2020 February 16, 2021 \n", + "8 Cov-2-IDF September 22, 2020 December 30, 2021 \n", + "9 FALP 001-2020 April 7, 2020 April 6, 2021 \n", + "10 20-23 April 28, 2020 July 2020 \n", + "11 FRA-COVID April 16, 2020 March 1, 2021 \n", + "12 20F.1043 November 17, 2020 December 31, 2021 \n", + "13 COVID-19 ORL June 11, 2020 October 16, 2020 \n", + "14 Cov-2-SMC-V-2020 December 24, 2020 October 24, 2021 \n", + "15 PI2020_843_0042 April 27, 2020 October 2020 \n", + "16 CHRO-2020-18 October 13, 2020 October 17, 2020 \n", + "17 IgG4-COVID March 24, 2021 July 2021 \n", + "18 Covid19-Pain December 1, 2020 July 31, 2021 \n", + "19 Covid-19 Validation Study June 1, 2020 May 10, 2021 \n", + "20 FACE COVID-19 November 14, 2020 December 1, 2020 \n", + "21 CT.001.1.0.SENTAD-COVID April 4, 2020 May 20, 2020 \n", + "22 2020-016|2020-002973-89 August 10, 2020 June 2021 \n", + "23 16917 April 8, 2020 April 8, 2021 \n", + "24 COVID-19vacc-1 January 12, 2021 January 12, 2022 \n", + "25 MentalPlus® November 8, 2020 December 29, 2020 \n", + "26 MI-COVID April 30, 2020 April 30, 2021 \n", + "27 Pro00105703 June 9, 2020 June 30, 2021 \n", + "28 RECHMPL20_0176 March 1, 2020 April 1, 2020 \n", + "29 RC31/20/0123 April 17, 2020 April 2021 \n", + "\n", + " Completion Date First Posted Results First Posted \\\n", + "0 April 30, 2021 March 8, 2021 NaN \n", + "1 January 29, 2021 October 20, 2020 NaN \n", + "2 June 15, 2021 May 20, 2020 NaN \n", + "3 August 31, 2020 June 4, 2020 NaN \n", + "4 May 2021 May 20, 2020 NaN \n", + "5 December 30, 2020 August 18, 2020 NaN \n", + "6 June 2022 July 20, 2020 NaN \n", + "7 May 16, 2021 November 18, 2020 NaN \n", + "8 December 30, 2021 October 26, 2020 NaN \n", + "9 April 6, 2021 May 12, 2020 NaN \n", + "10 August 2020 April 21, 2020 NaN \n", + "11 March 1, 2021 June 2, 2020 NaN \n", + "12 June 30, 2022 December 9, 2020 NaN \n", + "13 October 16, 2020 June 11, 2020 NaN \n", + "14 October 24, 2021 April 13, 2021 NaN \n", + "15 October 2020 April 29, 2020 NaN \n", + "16 October 17, 2020 October 30, 2020 NaN \n", + "17 October 2021 March 26, 2021 NaN \n", + "18 July 31, 2021 November 17, 2020 NaN \n", + "19 May 10, 2021 May 29, 2020 NaN \n", + "20 December 31, 2020 December 8, 2020 NaN \n", + "21 July 14, 2020 July 16, 2020 NaN \n", + "22 July 2021 August 4, 2020 NaN \n", + "23 April 8, 2026 September 16, 2020 NaN \n", + "24 January 12, 2024 January 12, 2021 NaN \n", + "25 December 29, 2023 November 17, 2020 NaN \n", + "26 April 30, 2021 June 11, 2020 NaN \n", + "27 June 30, 2021 June 9, 2020 NaN \n", + "28 April 30, 2020 May 29, 2020 NaN \n", + "29 April 2022 April 24, 2020 NaN \n", + "\n", + " Last Update Posted Locations \\\n", + "0 March 8, 2021 Groupe Hospitalier Paris Saint-Joseph, Paris, ... \n", + "1 October 20, 2020 Cimedical, Barranquilla, Atlantico, Colombia \n", + "2 November 9, 2020 Ospedale Papa Giovanni XXIII, Bergamo, Italy|P... \n", + "3 June 4, 2020 Hong Kong Sanatorium & Hospital, Hong Kong, Ho... \n", + "4 June 4, 2020 CHR Orléans, Orléans, France \n", + "5 August 20, 2020 Vinmec Research Institute of Stem cell and Gen... \n", + "6 July 20, 2020 Meharry Medical College, Nashville, Tennessee,... \n", + "7 November 18, 2020 Orthomed Hospital, Chennai, Tamil Nadu, India|... \n", + "8 April 13, 2021 IDF COVID 19 Isolation Facility, Ashkelon, Israel \n", + "9 May 12, 2020 Fundacion Arturo Lopez Perez, Providencia, San... \n", + "10 May 20, 2020 The Christ Hospital, Cincinnati, Ohio, United ... \n", + "11 January 22, 2021 Dipartimento di Geriatria, Fondazione Poliambu... \n", + "12 March 5, 2021 Thomas Jefferson University Hospital, Philadel... \n", + "13 January 27, 2021 ASST Monza-Ospedale San Gerardo, Monza, Italy \n", + "14 April 13, 2021 Shamir Medical Center, Be'er Ya'aqov, Israel \n", + "15 April 29, 2020 CHU Amiens, Amiens, France \n", + "16 December 7, 2020 Centre Hospitalier Régional d'Orléans, France,... \n", + "17 March 26, 2021 Prince of Wales Hospital, The Chinese Universi... \n", + "18 November 19, 2020 Jorge Hugo Villafañe, Milan, Italy \n", + "19 June 24, 2020 King's College London, London, United Kingdom \n", + "20 December 8, 2020 IRCCS Istituto Ortopedico Galeazzi, Milano, Italy \n", + "21 July 16, 2020 Abu Dhabi Stem Cells Center, Abu Dhabi, United... \n", + "22 April 13, 2021 SGS Life Sciences, Clinical Pharmacology Unit,... \n", + "23 September 17, 2020 University of Siena, Siena, Italy \n", + "24 January 12, 2021 University of Bergen, Bergen, Norway \n", + "25 November 17, 2020 Livia Stocco Sanches Valentin, São Paulo, SP, ... \n", + "26 November 9, 2020 ASST Monza-Ospedale San Gerardo, Monza, Italy \n", + "27 November 17, 2020 Duke CTSI Translational Population Health Offi... \n", + "28 June 1, 2020 Uhmontpellier, Montpellier, France \n", + "29 April 24, 2020 University Hospital of Toulouse, Toulouse, France \n", + "\n", + " Study Documents \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "5 NaN \n", + "6 NaN \n", + "7 NaN \n", + "8 NaN \n", + "9 NaN \n", + "10 NaN \n", + "11 NaN \n", + "12 NaN \n", + "13 NaN \n", + "14 NaN \n", + "15 NaN \n", + "16 NaN \n", + "17 NaN \n", + "18 NaN \n", + "19 \"Statistical Analysis Plan\", https://ClinicalT... \n", + "20 NaN \n", + "21 NaN \n", + "22 NaN \n", + "23 NaN \n", + "24 NaN \n", + "25 NaN \n", + "26 NaN \n", + "27 NaN \n", + "28 NaN \n", + "29 NaN \n", + "\n", + " URL \n", + "0 https://ClinicalTrials.gov/show/NCT04785898 \n", + "1 https://ClinicalTrials.gov/show/NCT04595136 \n", + "2 https://ClinicalTrials.gov/show/NCT04395482 \n", + "3 https://ClinicalTrials.gov/show/NCT04416061 \n", + "4 https://ClinicalTrials.gov/show/NCT04395924 \n", + "5 https://ClinicalTrials.gov/show/NCT04516954 \n", + "6 https://ClinicalTrials.gov/show/NCT04476940 \n", + "7 https://ClinicalTrials.gov/show/NCT04634214 \n", + "8 https://ClinicalTrials.gov/show/NCT04602884 \n", + "9 https://ClinicalTrials.gov/show/NCT04384588 \n", + "10 https://ClinicalTrials.gov/show/NCT04355897 \n", + "11 https://ClinicalTrials.gov/show/NCT04412265 \n", + "12 https://ClinicalTrials.gov/show/NCT04659759 \n", + "13 https://ClinicalTrials.gov/show/NCT04427332 \n", + "14 https://ClinicalTrials.gov/show/NCT04842708 \n", + "15 https://ClinicalTrials.gov/show/NCT04367805 \n", + "16 https://ClinicalTrials.gov/show/NCT04609969 \n", + "17 https://ClinicalTrials.gov/show/NCT04817553 \n", + "18 https://ClinicalTrials.gov/show/NCT04632355 \n", + "19 https://ClinicalTrials.gov/show/NCT04407585 \n", + "20 https://ClinicalTrials.gov/show/NCT04657510 \n", + "21 https://ClinicalTrials.gov/show/NCT04473170 \n", + "22 https://ClinicalTrials.gov/show/NCT04497298 \n", + "23 https://ClinicalTrials.gov/show/NCT04549831 \n", + "24 https://ClinicalTrials.gov/show/NCT04706390 \n", + "25 https://ClinicalTrials.gov/show/NCT04632719 \n", + "26 https://ClinicalTrials.gov/show/NCT04427345 \n", + "27 https://ClinicalTrials.gov/show/NCT04424004 \n", + "28 https://ClinicalTrials.gov/show/NCT04407494 \n", + "29 https://ClinicalTrials.gov/show/NCT04360811 \n", + "\n", + "[30 rows x 27 columns]\n" + ] + } + ], + "source": [ + "print(\"First 30 rows:\", df.head(30))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "772fdab3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First 5 rows: Rank NCT Number Title \\\n", + "5778 5779 NCT04011644 Mobile Health for Alcohol Use Disorders in Cli... \n", + "5779 5780 NCT04681339 Antibiotic Prescription in Children Hospitaliz... \n", + "5780 5781 NCT04740229 Moderate-intensity Flow-based Yoga Effects on ... \n", + "5781 5782 NCT04804917 3-year Follow-up of the Mind My Mind RCT \n", + "5782 5783 NCT04680000 Chronic Pain Management In Primary Care Using ... \n", + "\n", + " Acronym Status Study Results \\\n", + "5778 NaN Recruiting No Results Available \n", + "5779 NaN Not yet recruiting No Results Available \n", + "5780 NaN Recruiting No Results Available \n", + "5781 MindMyMindFU Recruiting No Results Available \n", + "5782 NaN Not yet recruiting No Results Available \n", + "\n", + " Conditions \\\n", + "5778 Alcohol Drinking|Telemedicine \n", + "5779 Community Acquired Pneumonia in Children|Antib... \n", + "5780 Stress|Psychological \n", + "5781 Emotional Problem|Anxiety Disorder of Childhoo... \n", + "5782 Chronic Pain \n", + "\n", + " Interventions \\\n", + "5778 Behavioral: A-CHESS self-monitored|Behavioral:... \n", + "5779 Other: Antibiotic treatment|Other: No antibiot... \n", + "5780 Behavioral: Yoga \n", + "5781 NaN \n", + "5782 Behavioral: Brief Cognitive Behavioral Therapy... \n", + "\n", + " Outcome Measures \\\n", + "5778 Number of risky drinking days|Number of patien... \n", + "5779 Antibiotic treatment rates in hospitalized chi... \n", + "5780 Perceived Stress|Task switching paradigm|Digit... \n", + "5781 The child's impact of mental health problems r... \n", + "5782 Defense and Veterans Pain Rating Scale (DVPRS)... \n", + "\n", + " Sponsor/Collaborators ... \\\n", + "5778 University of Wisconsin, Madison|National Inst... ... \n", + "5779 ARCIM Institute Academic Research in Complemen... ... \n", + "5780 University of Illinois at Urbana-Champaign ... \n", + "5781 Mental Health Services in the Capital Region, ... ... \n", + "5782 The University of Texas Health Science Center ... ... \n", + "\n", + " Other IDs Start Date \\\n", + "5778 2019-0337|R01AA024150|A532007|SMPH/FAMILY MED/... March 23, 2020 \n", + "5779 PKA-03 April 2021 \n", + "5780 21584 February 10, 2021 \n", + "5781 MHSCRDenmark, F-61502-03-1 March 22, 2021 \n", + "5782 HSC20200520H February 2021 \n", + "\n", + " Primary Completion Date Completion Date First Posted \\\n", + "5778 August 2022 April 2023 July 8, 2019 \n", + "5779 November 2024 December 2024 December 23, 2020 \n", + "5780 July 2021 July 2021 February 5, 2021 \n", + "5781 May 31, 2022 December 31, 2022 March 18, 2021 \n", + "5782 February 2024 February 2025 December 22, 2020 \n", + "\n", + " Results First Posted Last Update Posted \\\n", + "5778 NaN April 2, 2021 \n", + "5779 NaN February 10, 2021 \n", + "5780 NaN February 24, 2021 \n", + "5781 NaN April 1, 2021 \n", + "5782 NaN December 22, 2020 \n", + "\n", + " Locations Study Documents \\\n", + "5778 UW Health at the American Center, Madison, Wis... NaN \n", + "5779 Die Filderklinik, Filderstadt, Baden-Württembe... NaN \n", + "5780 University of Illinois at Urbana-Champaign, Ur... NaN \n", + "5781 Mental Health Services in the Capital Region, ... NaN \n", + "5782 Uniformed Services University for the Health S... NaN \n", + "\n", + " URL \n", + "5778 https://ClinicalTrials.gov/show/NCT04011644 \n", + "5779 https://ClinicalTrials.gov/show/NCT04681339 \n", + "5780 https://ClinicalTrials.gov/show/NCT04740229 \n", + "5781 https://ClinicalTrials.gov/show/NCT04804917 \n", + "5782 https://ClinicalTrials.gov/show/NCT04680000 \n", + "\n", + "[5 rows x 27 columns]\n" + ] + } + ], + "source": [ + "print(\"First 5 rows:\", df.tail(5))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "0efa034f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First 15 rows: Rank NCT Number Title \\\n", + "5768 5769 NCT04734795 The Prevalence of Dysfunctional Breathing in C... \n", + "5769 5770 NCT04190368 Team Clinic: Virtual Expansion of an Innovativ... \n", + "5770 5771 NCT03392883 Scaling Up Science-based Mental Health Interve... \n", + "5771 5772 NCT04301518 Prematurity Risk Assessment Combined With Clin... \n", + "5772 5773 NCT04607902 Harnessing Network Science to Personalize Scal... \n", + "5773 5774 NCT04639661 Predictors of Periodontal Outcomes Post-sanati... \n", + "5774 5775 NCT04180709 CBT to Reduce Insomnia and Improve Social Reco... \n", + "5775 5776 NCT04335643 Telehealth CBT for Adolescents and Young Adult... \n", + "5776 5777 NCT04589377 Mindfulness to Mitigate Psychological Threat a... \n", + "5777 5778 NCT04574466 Scaling-up Psychological Interventions With Sy... \n", + "5778 5779 NCT04011644 Mobile Health for Alcohol Use Disorders in Cli... \n", + "5779 5780 NCT04681339 Antibiotic Prescription in Children Hospitaliz... \n", + "5780 5781 NCT04740229 Moderate-intensity Flow-based Yoga Effects on ... \n", + "5781 5782 NCT04804917 3-year Follow-up of the Mind My Mind RCT \n", + "5782 5783 NCT04680000 Chronic Pain Management In Primary Care Using ... \n", + "\n", + " Acronym Status Study Results \\\n", + "5768 NaN Recruiting No Results Available \n", + "5769 NaN Not yet recruiting No Results Available \n", + "5770 DIADA Active, not recruiting No Results Available \n", + "5771 PRIME Recruiting No Results Available \n", + "5772 NaN Recruiting No Results Available \n", + "5773 NaN Enrolling by invitation No Results Available \n", + "5774 CRISP Recruiting No Results Available \n", + "5775 cSLE Recruiting No Results Available \n", + "5776 NaN Recruiting No Results Available \n", + "5777 NaN Recruiting No Results Available \n", + "5778 NaN Recruiting No Results Available \n", + "5779 NaN Not yet recruiting No Results Available \n", + "5780 NaN Recruiting No Results Available \n", + "5781 MindMyMindFU Recruiting No Results Available \n", + "5782 NaN Not yet recruiting No Results Available \n", + "\n", + " Conditions \\\n", + "5768 Dysfunctional Breathing|Asthma in Children|Asthma \n", + "5769 Type 1 Diabetes \n", + "5770 Depression|Problematic Alcohol Use \n", + "5771 Preterm Labor|Preterm Birth \n", + "5772 Depression \n", + "5773 Periodontal Diseases|Periodontal Pocket \n", + "5774 Psychotic Disorders|Psychosis|Sleep \n", + "5775 Systemic Lupus Erythematosus \n", + "5776 Stress \n", + "5777 Distress|PTSD|Anxiety|Depression|Trauma|Functi... \n", + "5778 Alcohol Drinking|Telemedicine \n", + "5779 Community Acquired Pneumonia in Children|Antib... \n", + "5780 Stress|Psychological \n", + "5781 Emotional Problem|Anxiety Disorder of Childhoo... \n", + "5782 Chronic Pain \n", + "\n", + " Interventions \\\n", + "5768 NaN \n", + "5769 Other: Team Clinic Care|Other: Standard Care \n", + "5770 Behavioral: Laddr \n", + "5771 Other: Multimodal intervention strategy \n", + "5772 Behavioral: Supportive Therapy SSI|Behavioral:... \n", + "5773 NaN \n", + "5774 Device: Sleepio \n", + "5775 Behavioral: TEACH \n", + "5776 Behavioral: Mindfulness Training \n", + "5777 Behavioral: Problem Management Plus \n", + "5778 Behavioral: A-CHESS self-monitored|Behavioral:... \n", + "5779 Other: Antibiotic treatment|Other: No antibiot... \n", + "5780 Behavioral: Yoga \n", + "5781 NaN \n", + "5782 Behavioral: Brief Cognitive Behavioral Therapy... \n", + "\n", + " Outcome Measures \\\n", + "5768 Prevalence of children with dysfunctional brea... \n", + "5769 Hemoglobin A1C at Baseline|Hemoglobin A1C Prog... \n", + "5770 The Integrated Measure of Implementation Conte... \n", + "5771 Neonatal morbidity/mortality|Length of neonata... \n", + "5772 Change in adolescent depressive symptom severi... \n", + "5773 Probing depth|Bleeding on probing|Tooth Loss|O... \n", + "5774 Change from baseline Work and Social Adjustmen... \n", + "5775 Recruitment rates of the study|Retention rates... \n", + "5776 Mean differences in Psychological Threat measu... \n", + "5777 Change in psychological distress|Change in pos... \n", + "5778 Number of risky drinking days|Number of patien... \n", + "5779 Antibiotic treatment rates in hospitalized chi... \n", + "5780 Perceived Stress|Task switching paradigm|Digit... \n", + "5781 The child's impact of mental health problems r... \n", + "5782 Defense and Veterans Pain Rating Scale (DVPRS)... \n", + "\n", + " Sponsor/Collaborators ... \\\n", + "5768 Kolding Sygehus|Odense Patient Data Explorativ... ... \n", + "5769 Children's Hospital Los Angeles|University of ... ... \n", + "5770 Dartmouth-Hitchcock Medical Center|Pontificia ... ... \n", + "5771 Sera Prognostics, Inc.|High Risk Pregnancy Cen... ... \n", + "5772 Stony Brook University ... \n", + "5773 Brock University|Dr. Peter C. Fritz, Periodont... ... \n", + "5774 University of Cambridge|Cambridgeshire and Pet... ... \n", + "5775 Michigan State University|Arthritis Foundation... ... \n", + "5776 University of Pittsburgh|U.S. National Science... ... \n", + "5777 University of Zurich ... \n", + "5778 University of Wisconsin, Madison|National Inst... ... \n", + "5779 ARCIM Institute Academic Research in Complemen... ... \n", + "5780 University of Illinois at Urbana-Champaign ... \n", + "5781 Mental Health Services in the Capital Region, ... ... \n", + "5782 The University of Texas Health Science Center ... ... \n", + "\n", + " Other IDs Start Date \\\n", + "5768 S-2020-0101-a February 5, 2021 \n", + "5769 CHLA-19-00062 March 1, 2021 \n", + "5770 1U19MH109988 D18019|1U19MH109988 February 13, 2018 \n", + "5771 SP019 November 6, 2020 \n", + "5772 IRB2019-00382 February 15, 2021 \n", + "5773 20-070 November 25, 2020 \n", + "5774 M00915|RNAG-521|224101|19/EE/0352 October 30, 2020 \n", + "5775 STUDY00003882 August 4, 2020 \n", + "5776 STUDY19050258 October 26, 2020 \n", + "5777 BASEC-2017-01175-rct August 25, 2020 \n", + "5778 2019-0337|R01AA024150|A532007|SMPH/FAMILY MED/... March 23, 2020 \n", + "5779 PKA-03 April 2021 \n", + "5780 21584 February 10, 2021 \n", + "5781 MHSCRDenmark, F-61502-03-1 March 22, 2021 \n", + "5782 HSC20200520H February 2021 \n", + "\n", + " Primary Completion Date Completion Date First Posted \\\n", + "5768 December 2022 December 2022 February 2, 2021 \n", + "5769 March 30, 2022 December 30, 2022 December 9, 2019 \n", + "5770 April 2021 April 2021 January 8, 2018 \n", + "5771 September 30, 2022 June 30, 2025 March 10, 2020 \n", + "5772 July 2024 August 2024 October 29, 2020 \n", + "5773 August 2021 December 2021 November 20, 2020 \n", + "5774 November 30, 2022 November 30, 2022 November 27, 2019 \n", + "5775 December 2021 December 2021 April 6, 2020 \n", + "5776 August 31, 2022 December 31, 2022 October 19, 2020 \n", + "5777 June 2022 June 2022 October 5, 2020 \n", + "5778 August 2022 April 2023 July 8, 2019 \n", + "5779 November 2024 December 2024 December 23, 2020 \n", + "5780 July 2021 July 2021 February 5, 2021 \n", + "5781 May 31, 2022 December 31, 2022 March 18, 2021 \n", + "5782 February 2024 February 2025 December 22, 2020 \n", + "\n", + " Results First Posted Last Update Posted \\\n", + "5768 NaN February 9, 2021 \n", + "5769 NaN March 1, 2021 \n", + "5770 NaN December 19, 2020 \n", + "5771 NaN April 14, 2021 \n", + "5772 NaN March 17, 2021 \n", + "5773 NaN December 4, 2020 \n", + "5774 NaN November 3, 2020 \n", + "5775 NaN March 9, 2021 \n", + "5776 NaN November 10, 2020 \n", + "5777 NaN October 5, 2020 \n", + "5778 NaN April 2, 2021 \n", + "5779 NaN February 10, 2021 \n", + "5780 NaN February 24, 2021 \n", + "5781 NaN April 1, 2021 \n", + "5782 NaN December 22, 2020 \n", + "\n", + " Locations \\\n", + "5768 Kolding Sygehus, Kolding, Denmark \n", + "5769 Children's Hospital Los Angeles, Los Angeles, ... \n", + "5770 Salud de Tundama, Duitama, Boyaca, Colombia|Em... \n", + "5771 High Risk Pregnancy Center, Las Vegas, Nevada,... \n", + "5772 Stony Brook University, Stony Brook, New York,... \n", + "5773 Dr. Peter C. Fritz, Periodontal Wellness & Imp... \n", + "5774 Cameo Early Intervention, Cambridge, Cambridge... \n", + "5775 Michigan State University, Grand Rapids, Michi... \n", + "5776 University of Pittsburgh, Pittsburgh, Pennsylv... \n", + "5777 Klinik für Konsiliarpsychiatrie und Psychosoma... \n", + "5778 UW Health at the American Center, Madison, Wis... \n", + "5779 Die Filderklinik, Filderstadt, Baden-Württembe... \n", + "5780 University of Illinois at Urbana-Champaign, Ur... \n", + "5781 Mental Health Services in the Capital Region, ... \n", + "5782 Uniformed Services University for the Health S... \n", + "\n", + " Study Documents \\\n", + "5768 NaN \n", + "5769 NaN \n", + "5770 \"Informed Consent Form: Patient Informed Conse... \n", + "5771 NaN \n", + "5772 NaN \n", + "5773 NaN \n", + "5774 NaN \n", + "5775 NaN \n", + "5776 NaN \n", + "5777 NaN \n", + "5778 NaN \n", + "5779 NaN \n", + "5780 NaN \n", + "5781 NaN \n", + "5782 NaN \n", + "\n", + " URL \n", + "5768 https://ClinicalTrials.gov/show/NCT04734795 \n", + "5769 https://ClinicalTrials.gov/show/NCT04190368 \n", + "5770 https://ClinicalTrials.gov/show/NCT03392883 \n", + "5771 https://ClinicalTrials.gov/show/NCT04301518 \n", + "5772 https://ClinicalTrials.gov/show/NCT04607902 \n", + "5773 https://ClinicalTrials.gov/show/NCT04639661 \n", + "5774 https://ClinicalTrials.gov/show/NCT04180709 \n", + "5775 https://ClinicalTrials.gov/show/NCT04335643 \n", + "5776 https://ClinicalTrials.gov/show/NCT04589377 \n", + "5777 https://ClinicalTrials.gov/show/NCT04574466 \n", + "5778 https://ClinicalTrials.gov/show/NCT04011644 \n", + "5779 https://ClinicalTrials.gov/show/NCT04681339 \n", + "5780 https://ClinicalTrials.gov/show/NCT04740229 \n", + "5781 https://ClinicalTrials.gov/show/NCT04804917 \n", + "5782 https://ClinicalTrials.gov/show/NCT04680000 \n", + "\n", + "[15 rows x 27 columns]\n" + ] + } + ], + "source": [ + "print(\"First 15 rows:\", df.tail(15))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "71994f54", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First 30 rows: Rank NCT Number Title \\\n", + "5753 5754 NCT04646291 Retrospective Study on the Use, Efficiency, an... \n", + "5754 5755 NCT04735406 The MS-LINK™ Outcomes Study \n", + "5755 5756 NCT02495753 Vaginal Cleansing Before Cesarean Delivery to ... \n", + "5756 5757 NCT03465280 Airway Intervention Registry (AIR): Recurrent ... \n", + "5757 5758 NCT04576377 Dynamics of the Immune Responses to Repeat Inf... \n", + "5758 5759 NCT04713735 Impact of Lactoferrin, a Dietary Supplement, v... \n", + "5759 5760 NCT03873467 Group Lifestyle Balance™ for Individuals With ... \n", + "5760 5761 NCT03594734 Group Lifestyle Balance™ for Individuals With ... \n", + "5761 5762 NCT03728257 Lung Transplant G0 (LTGO): Improving Self-Mana... \n", + "5762 5763 NCT04782518 Personalizing Exercise for Parkinson Disease \n", + "5763 5764 NCT04783454 Preventing Neck & Back Pain in Teleworking Off... \n", + "5764 5765 NCT03823469 Evaluating the Impact of a Culinary Coaching T... \n", + "5765 5766 NCT04429061 Reaching 90 90 90 in Adolescents in Zambia: Us... \n", + "5766 5767 NCT04184895 A Study to Assess the Safety, Tolerability and... \n", + "5767 5768 NCT03922334 Navigating New Motherhood 2 \n", + "5768 5769 NCT04734795 The Prevalence of Dysfunctional Breathing in C... \n", + "5769 5770 NCT04190368 Team Clinic: Virtual Expansion of an Innovativ... \n", + "5770 5771 NCT03392883 Scaling Up Science-based Mental Health Interve... \n", + "5771 5772 NCT04301518 Prematurity Risk Assessment Combined With Clin... \n", + "5772 5773 NCT04607902 Harnessing Network Science to Personalize Scal... \n", + "5773 5774 NCT04639661 Predictors of Periodontal Outcomes Post-sanati... \n", + "5774 5775 NCT04180709 CBT to Reduce Insomnia and Improve Social Reco... \n", + "5775 5776 NCT04335643 Telehealth CBT for Adolescents and Young Adult... \n", + "5776 5777 NCT04589377 Mindfulness to Mitigate Psychological Threat a... \n", + "5777 5778 NCT04574466 Scaling-up Psychological Interventions With Sy... \n", + "5778 5779 NCT04011644 Mobile Health for Alcohol Use Disorders in Cli... \n", + "5779 5780 NCT04681339 Antibiotic Prescription in Children Hospitaliz... \n", + "5780 5781 NCT04740229 Moderate-intensity Flow-based Yoga Effects on ... \n", + "5781 5782 NCT04804917 3-year Follow-up of the Mind My Mind RCT \n", + "5782 5783 NCT04680000 Chronic Pain Management In Primary Care Using ... \n", + "\n", + " Acronym Status Study Results \\\n", + "5753 NaN Completed No Results Available \n", + "5754 NaN Not yet recruiting No Results Available \n", + "5755 NaN Active, not recruiting No Results Available \n", + "5756 AIR:RRP Recruiting No Results Available \n", + "5757 DRIVE Recruiting No Results Available \n", + "5758 NaN Recruiting No Results Available \n", + "5759 GLB-CVA Active, not recruiting No Results Available \n", + "5760 NaN Active, not recruiting No Results Available \n", + "5761 LTGO Recruiting No Results Available \n", + "5762 PEP Recruiting No Results Available \n", + "5763 Prevent@HOME Recruiting No Results Available \n", + "5764 NaN Recruiting No Results Available \n", + "5765 SKILLZ Recruiting No Results Available \n", + "5766 NaN Recruiting No Results Available \n", + "5767 NNM2 Recruiting No Results Available \n", + "5768 NaN Recruiting No Results Available \n", + "5769 NaN Not yet recruiting No Results Available \n", + "5770 DIADA Active, not recruiting No Results Available \n", + "5771 PRIME Recruiting No Results Available \n", + "5772 NaN Recruiting No Results Available \n", + "5773 NaN Enrolling by invitation No Results Available \n", + "5774 CRISP Recruiting No Results Available \n", + "5775 cSLE Recruiting No Results Available \n", + "5776 NaN Recruiting No Results Available \n", + "5777 NaN Recruiting No Results Available \n", + "5778 NaN Recruiting No Results Available \n", + "5779 NaN Not yet recruiting No Results Available \n", + "5780 NaN Recruiting No Results Available \n", + "5781 MindMyMindFU Recruiting No Results Available \n", + "5782 NaN Not yet recruiting No Results Available \n", + "\n", + " Conditions \\\n", + "5753 Infertility|Infertility, Female|Infertility, Male \n", + "5754 Multiple Sclerosis \n", + "5755 Complications; Cesarean Section \n", + "5756 Recurrent Respiratory Papillomatosis|Human Pap... \n", + "5757 Influenza, Human \n", + "5758 Immune Health \n", + "5759 Stroke \n", + "5760 Traumatic Brain Injury \n", + "5761 Exercise|Lung Transplantation \n", + "5762 Parkinson Disease \n", + "5763 Neck Pain|Low Back Pain|Back Pain \n", + "5764 Overweight|Obesity \n", + "5765 HIV Infections|Pregnancy Related|STI|Mental He... \n", + "5766 Allergic to House Dust Mites \n", + "5767 Postpartum Health|Breastfeeding|Contraception|... \n", + "5768 Dysfunctional Breathing|Asthma in Children|Asthma \n", + "5769 Type 1 Diabetes \n", + "5770 Depression|Problematic Alcohol Use \n", + "5771 Preterm Labor|Preterm Birth \n", + "5772 Depression \n", + "5773 Periodontal Diseases|Periodontal Pocket \n", + "5774 Psychotic Disorders|Psychosis|Sleep \n", + "5775 Systemic Lupus Erythematosus \n", + "5776 Stress \n", + "5777 Distress|PTSD|Anxiety|Depression|Trauma|Functi... \n", + "5778 Alcohol Drinking|Telemedicine \n", + "5779 Community Acquired Pneumonia in Children|Antib... \n", + "5780 Stress|Psychological \n", + "5781 Emotional Problem|Anxiety Disorder of Childhoo... \n", + "5782 Chronic Pain \n", + "\n", + " Interventions \\\n", + "5753 Device: Insemination with the Mosie Baby Kit \n", + "5754 NaN \n", + "5755 Procedure: Vaginal Cleansing|Procedure: Abdomi... \n", + "5756 Procedure: Microdebrider|Procedure: Cold-steel... \n", + "5757 Biological: FluBlok|Other: Placebo \n", + "5758 Dietary Supplement: Control: Placebo|Dietary S... \n", + "5759 Behavioral: Group Lifestyle Balance|Other: Usu... \n", + "5760 Behavioral: Group Lifestyle Balance™|Other: At... \n", + "5761 Behavioral: LTGO-Home Based Exercise|Behaviora... \n", + "5762 NaN \n", + "5763 Other: Exercise program \n", + "5764 Behavioral: CCTP|Behavioral: Nutritional couns... \n", + "5765 Behavioral: SKILLZ-Girl Enhanced football curr... \n", + "5766 Biological: ASP2390|Biological: Placebo \n", + "5767 Behavioral: Patient Navigation Program \n", + "5768 NaN \n", + "5769 Other: Team Clinic Care|Other: Standard Care \n", + "5770 Behavioral: Laddr \n", + "5771 Other: Multimodal intervention strategy \n", + "5772 Behavioral: Supportive Therapy SSI|Behavioral:... \n", + "5773 NaN \n", + "5774 Device: Sleepio \n", + "5775 Behavioral: TEACH \n", + "5776 Behavioral: Mindfulness Training \n", + "5777 Behavioral: Problem Management Plus \n", + "5778 Behavioral: A-CHESS self-monitored|Behavioral:... \n", + "5779 Other: Antibiotic treatment|Other: No antibiot... \n", + "5780 Behavioral: Yoga \n", + "5781 NaN \n", + "5782 Behavioral: Brief Cognitive Behavioral Therapy... \n", + "\n", + " Outcome Measures \\\n", + "5753 Pregnancy rate|Pregnancy rate differences betw... \n", + "5754 Patient-determined Disease Steps (PDDS) Scale ... \n", + "5755 Composite Postoperative Infectious Morbidity|A... \n", + "5756 Time interval between surgical interventions|R... \n", + "5757 Immune response to vaccination (4-fold rise in... \n", + "5758 Number of Respiratory Tract Infections|Severit... \n", + "5759 Change in weight|Physical Activity|Arm Circumf... \n", + "5760 Change in weight|Step Count|Waist and Arm Circ... \n", + "5761 Physical function-Walking, Change is being Ass... \n", + "5762 Association between exercise patterns and qual... \n", + "5763 neck pain|low back pain|behavioral change|phys... \n", + "5764 Body weight at 6 months|Body weight|Culinary a... \n", + "5765 Number of participants undergoing HIV testing ... \n", + "5766 Number of participants with Adverse Events (AE... \n", + "5767 Postpartum health at 4-12 weeks after delivery... \n", + "5768 Prevalence of children with dysfunctional brea... \n", + "5769 Hemoglobin A1C at Baseline|Hemoglobin A1C Prog... \n", + "5770 The Integrated Measure of Implementation Conte... \n", + "5771 Neonatal morbidity/mortality|Length of neonata... \n", + "5772 Change in adolescent depressive symptom severi... \n", + "5773 Probing depth|Bleeding on probing|Tooth Loss|O... \n", + "5774 Change from baseline Work and Social Adjustmen... \n", + "5775 Recruitment rates of the study|Retention rates... \n", + "5776 Mean differences in Psychological Threat measu... \n", + "5777 Change in psychological distress|Change in pos... \n", + "5778 Number of risky drinking days|Number of patien... \n", + "5779 Antibiotic treatment rates in hospitalized chi... \n", + "5780 Perceived Stress|Task switching paradigm|Digit... \n", + "5781 The child's impact of mental health problems r... \n", + "5782 Defense and Veterans Pain Rating Scale (DVPRS)... \n", + "\n", + " Sponsor/Collaborators ... \\\n", + "5753 Mosie Baby ... \n", + "5754 EMD Serono Research & Development Institute, I... ... \n", + "5755 Washington University School of Medicine ... \n", + "5756 Newcastle-upon-Tyne Hospitals NHS Trust|Alder ... ... \n", + "5757 The University of Hong Kong|University of Chic... ... \n", + "5758 Mead Johnson Nutrition ... \n", + "5759 Baylor Research Institute|National Institute o... ... \n", + "5760 Baylor Research Institute|National Institute o... ... \n", + "5761 University of Pittsburgh|National Institute of... ... \n", + "5762 Washington University School of Medicine|Ameri... ... \n", + "5763 University Ghent ... \n", + "5764 Spaulding Rehabilitation Hospital|Sheba Medica... ... \n", + "5765 University of Alabama at Birmingham|University... ... \n", + "5766 Astellas Pharma Global Development, Inc.|Astel... ... \n", + "5767 Northwestern University|Eunice Kennedy Shriver... ... \n", + "5768 Kolding Sygehus|Odense Patient Data Explorativ... ... \n", + "5769 Children's Hospital Los Angeles|University of ... ... \n", + "5770 Dartmouth-Hitchcock Medical Center|Pontificia ... ... \n", + "5771 Sera Prognostics, Inc.|High Risk Pregnancy Cen... ... \n", + "5772 Stony Brook University ... \n", + "5773 Brock University|Dr. Peter C. Fritz, Periodont... ... \n", + "5774 University of Cambridge|Cambridgeshire and Pet... ... \n", + "5775 Michigan State University|Arthritis Foundation... ... \n", + "5776 University of Pittsburgh|U.S. National Science... ... \n", + "5777 University of Zurich ... \n", + "5778 University of Wisconsin, Madison|National Inst... ... \n", + "5779 ARCIM Institute Academic Research in Complemen... ... \n", + "5780 University of Illinois at Urbana-Champaign ... \n", + "5781 Mental Health Services in the Capital Region, ... ... \n", + "5782 The University of Texas Health Science Center ... ... \n", + "\n", + " Other IDs Start Date \\\n", + "5753 Mosie-001 August 5, 2020 \n", + "5754 MS200077_0021 April 7, 2021 \n", + "5755 201505127 August 2015 \n", + "5756 8733 April 1, 2018 \n", + "5757 BJC033|1U01AI153700 October 5, 2020 \n", + "5758 3393-1 December 28, 2020 \n", + "5759 BSWRI 018-714 July 8, 2019 \n", + "5760 BSWRI 017-482 January 8, 2019 \n", + "5761 PRO18030496 (STUDY19020357)|1R01NR017196-01A1 April 1, 2019 \n", + "5762 202002075 February 19, 2021 \n", + "5763 BC-08635 November 12, 2020 \n", + "5764 2018P002115 May 20, 2019 \n", + "5765 IRB-300002251 March 1, 2020 \n", + "5766 2390-CL-0001|2018-004678-83 October 7, 2020 \n", + "5767 STU002096009|1R01HD098178 January 21, 2020 \n", + "5768 S-2020-0101-a February 5, 2021 \n", + "5769 CHLA-19-00062 March 1, 2021 \n", + "5770 1U19MH109988 D18019|1U19MH109988 February 13, 2018 \n", + "5771 SP019 November 6, 2020 \n", + "5772 IRB2019-00382 February 15, 2021 \n", + "5773 20-070 November 25, 2020 \n", + "5774 M00915|RNAG-521|224101|19/EE/0352 October 30, 2020 \n", + "5775 STUDY00003882 August 4, 2020 \n", + "5776 STUDY19050258 October 26, 2020 \n", + "5777 BASEC-2017-01175-rct August 25, 2020 \n", + "5778 2019-0337|R01AA024150|A532007|SMPH/FAMILY MED/... March 23, 2020 \n", + "5779 PKA-03 April 2021 \n", + "5780 21584 February 10, 2021 \n", + "5781 MHSCRDenmark, F-61502-03-1 March 22, 2021 \n", + "5782 HSC20200520H February 2021 \n", + "\n", + " Primary Completion Date Completion Date First Posted \\\n", + "5753 August 31, 2020 September 1, 2020 November 27, 2020 \n", + "5754 January 31, 2024 January 31, 2024 February 3, 2021 \n", + "5755 January 2021 June 2021 July 13, 2015 \n", + "5756 August 31, 2021 August 31, 2021 March 14, 2018 \n", + "5757 December 31, 2024 December 31, 2024 October 6, 2020 \n", + "5758 April 30, 2021 April 30, 2021 January 19, 2021 \n", + "5759 June 30, 2021 September 30, 2021 March 13, 2019 \n", + "5760 December 30, 2021 September 30, 2022 July 20, 2018 \n", + "5761 March 31, 2022 March 31, 2022 November 2, 2018 \n", + "5762 February 18, 2024 February 18, 2025 March 4, 2021 \n", + "5763 November 30, 2028 December 31, 2028 March 5, 2021 \n", + "5764 February 2022 February 2022 January 30, 2019 \n", + "5765 March 30, 2023 June 30, 2023 June 11, 2020 \n", + "5766 August 31, 2026 August 31, 2026 December 4, 2019 \n", + "5767 January 2024 December 2024 April 19, 2019 \n", + "5768 December 2022 December 2022 February 2, 2021 \n", + "5769 March 30, 2022 December 30, 2022 December 9, 2019 \n", + "5770 April 2021 April 2021 January 8, 2018 \n", + "5771 September 30, 2022 June 30, 2025 March 10, 2020 \n", + "5772 July 2024 August 2024 October 29, 2020 \n", + "5773 August 2021 December 2021 November 20, 2020 \n", + "5774 November 30, 2022 November 30, 2022 November 27, 2019 \n", + "5775 December 2021 December 2021 April 6, 2020 \n", + "5776 August 31, 2022 December 31, 2022 October 19, 2020 \n", + "5777 June 2022 June 2022 October 5, 2020 \n", + "5778 August 2022 April 2023 July 8, 2019 \n", + "5779 November 2024 December 2024 December 23, 2020 \n", + "5780 July 2021 July 2021 February 5, 2021 \n", + "5781 May 31, 2022 December 31, 2022 March 18, 2021 \n", + "5782 February 2024 February 2025 December 22, 2020 \n", + "\n", + " Results First Posted Last Update Posted \\\n", + "5753 NaN December 1, 2020 \n", + "5754 NaN March 5, 2021 \n", + "5755 NaN February 11, 2021 \n", + "5756 NaN September 9, 2020 \n", + "5757 NaN November 5, 2020 \n", + "5758 NaN January 19, 2021 \n", + "5759 NaN September 23, 2020 \n", + "5760 NaN September 23, 2020 \n", + "5761 NaN February 11, 2021 \n", + "5762 NaN March 4, 2021 \n", + "5763 NaN March 5, 2021 \n", + "5764 NaN May 22, 2020 \n", + "5765 NaN June 11, 2020 \n", + "5766 NaN March 23, 2021 \n", + "5767 NaN March 3, 2021 \n", + "5768 NaN February 9, 2021 \n", + "5769 NaN March 1, 2021 \n", + "5770 NaN December 19, 2020 \n", + "5771 NaN April 14, 2021 \n", + "5772 NaN March 17, 2021 \n", + "5773 NaN December 4, 2020 \n", + "5774 NaN November 3, 2020 \n", + "5775 NaN March 9, 2021 \n", + "5776 NaN November 10, 2020 \n", + "5777 NaN October 5, 2020 \n", + "5778 NaN April 2, 2021 \n", + "5779 NaN February 10, 2021 \n", + "5780 NaN February 24, 2021 \n", + "5781 NaN April 1, 2021 \n", + "5782 NaN December 22, 2020 \n", + "\n", + " Locations \\\n", + "5753 Virtual Study - Online Data Collection, Austin... \n", + "5754 Georgetown University, Washington, District of... \n", + "5755 Barnes Jewish Hospital, Saint Louis, Missouri,... \n", + "5756 NHS Grampian, Aberdeen, United Kingdom|Betsi C... \n", + "5757 The University of Hong Kong, Hong Kong, Hong Kong \n", + "5758 VA Greater Los Angeles Healthcare System, Los ... \n", + "5759 Baylor Scott & White Institute for Rehabilitat... \n", + "5760 Baylor Scott & White Institute for Rehabilitat... \n", + "5761 University of Pittsburgh, School of Nursing, P... \n", + "5762 Washington University School of Medicine, Sain... \n", + "5763 Ghent University, Ghent, East Flanders, Belgium \n", + "5764 Spaulding Rehabilitation hospitle, Charlestown... \n", + "5765 Centre of Infectious Disease Research Zambia, ... \n", + "5766 Site DE49001, Berlin, Germany|Site DE49002, Ha... \n", + "5767 Northwestern Memorial Hospital, Chicago, Illin... \n", + "5768 Kolding Sygehus, Kolding, Denmark \n", + "5769 Children's Hospital Los Angeles, Los Angeles, ... \n", + "5770 Salud de Tundama, Duitama, Boyaca, Colombia|Em... \n", + "5771 High Risk Pregnancy Center, Las Vegas, Nevada,... \n", + "5772 Stony Brook University, Stony Brook, New York,... \n", + "5773 Dr. Peter C. Fritz, Periodontal Wellness & Imp... \n", + "5774 Cameo Early Intervention, Cambridge, Cambridge... \n", + "5775 Michigan State University, Grand Rapids, Michi... \n", + "5776 University of Pittsburgh, Pittsburgh, Pennsylv... \n", + "5777 Klinik für Konsiliarpsychiatrie und Psychosoma... \n", + "5778 UW Health at the American Center, Madison, Wis... \n", + "5779 Die Filderklinik, Filderstadt, Baden-Württembe... \n", + "5780 University of Illinois at Urbana-Champaign, Ur... \n", + "5781 Mental Health Services in the Capital Region, ... \n", + "5782 Uniformed Services University for the Health S... \n", + "\n", + " Study Documents \\\n", + "5753 NaN \n", + "5754 NaN \n", + "5755 NaN \n", + "5756 NaN \n", + "5757 NaN \n", + "5758 NaN \n", + "5759 NaN \n", + "5760 NaN \n", + "5761 NaN \n", + "5762 NaN \n", + "5763 NaN \n", + "5764 NaN \n", + "5765 \"Study Protocol and Statistical Analysis Plan\"... \n", + "5766 NaN \n", + "5767 NaN \n", + "5768 NaN \n", + "5769 NaN \n", + "5770 \"Informed Consent Form: Patient Informed Conse... \n", + "5771 NaN \n", + "5772 NaN \n", + "5773 NaN \n", + "5774 NaN \n", + "5775 NaN \n", + "5776 NaN \n", + "5777 NaN \n", + "5778 NaN \n", + "5779 NaN \n", + "5780 NaN \n", + "5781 NaN \n", + "5782 NaN \n", + "\n", + " URL \n", + "5753 https://ClinicalTrials.gov/show/NCT04646291 \n", + "5754 https://ClinicalTrials.gov/show/NCT04735406 \n", + "5755 https://ClinicalTrials.gov/show/NCT02495753 \n", + "5756 https://ClinicalTrials.gov/show/NCT03465280 \n", + "5757 https://ClinicalTrials.gov/show/NCT04576377 \n", + "5758 https://ClinicalTrials.gov/show/NCT04713735 \n", + "5759 https://ClinicalTrials.gov/show/NCT03873467 \n", + "5760 https://ClinicalTrials.gov/show/NCT03594734 \n", + "5761 https://ClinicalTrials.gov/show/NCT03728257 \n", + "5762 https://ClinicalTrials.gov/show/NCT04782518 \n", + "5763 https://ClinicalTrials.gov/show/NCT04783454 \n", + "5764 https://ClinicalTrials.gov/show/NCT03823469 \n", + "5765 https://ClinicalTrials.gov/show/NCT04429061 \n", + "5766 https://ClinicalTrials.gov/show/NCT04184895 \n", + "5767 https://ClinicalTrials.gov/show/NCT03922334 \n", + "5768 https://ClinicalTrials.gov/show/NCT04734795 \n", + "5769 https://ClinicalTrials.gov/show/NCT04190368 \n", + "5770 https://ClinicalTrials.gov/show/NCT03392883 \n", + "5771 https://ClinicalTrials.gov/show/NCT04301518 \n", + "5772 https://ClinicalTrials.gov/show/NCT04607902 \n", + "5773 https://ClinicalTrials.gov/show/NCT04639661 \n", + "5774 https://ClinicalTrials.gov/show/NCT04180709 \n", + "5775 https://ClinicalTrials.gov/show/NCT04335643 \n", + "5776 https://ClinicalTrials.gov/show/NCT04589377 \n", + "5777 https://ClinicalTrials.gov/show/NCT04574466 \n", + "5778 https://ClinicalTrials.gov/show/NCT04011644 \n", + "5779 https://ClinicalTrials.gov/show/NCT04681339 \n", + "5780 https://ClinicalTrials.gov/show/NCT04740229 \n", + "5781 https://ClinicalTrials.gov/show/NCT04804917 \n", + "5782 https://ClinicalTrials.gov/show/NCT04680000 \n", + "\n", + "[30 rows x 27 columns]\n" + ] + } + ], + "source": [ + "print(\"First 30 rows:\", df.tail(30))" + ] + }, + { + "cell_type": "markdown", + "id": "3081d7f4", + "metadata": {}, + "source": [ + "#### Q2. Extract and document key insights by exploring column data types and applying info and describe and write down your insights inside the markdown." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "44bca453", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 5783 entries, 0 to 5782\n", + "Data columns (total 27 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Rank 5783 non-null int64 \n", + " 1 NCT Number 5783 non-null object \n", + " 2 Title 5783 non-null object \n", + " 3 Acronym 2480 non-null object \n", + " 4 Status 5783 non-null object \n", + " 5 Study Results 5783 non-null object \n", + " 6 Conditions 5783 non-null object \n", + " 7 Interventions 4897 non-null object \n", + " 8 Outcome Measures 5748 non-null object \n", + " 9 Sponsor/Collaborators 5783 non-null object \n", + " 10 Gender 5773 non-null object \n", + " 11 Age 5783 non-null object \n", + " 12 Phases 3322 non-null object \n", + " 13 Enrollment 5749 non-null float64\n", + " 14 Funded Bys 5783 non-null object \n", + " 15 Study Type 5783 non-null object \n", + " 16 Study Designs 5748 non-null object \n", + " 17 Other IDs 5782 non-null object \n", + " 18 Start Date 5749 non-null object \n", + " 19 Primary Completion Date 5747 non-null object \n", + " 20 Completion Date 5747 non-null object \n", + " 21 First Posted 5783 non-null object \n", + " 22 Results First Posted 36 non-null object \n", + " 23 Last Update Posted 5783 non-null object \n", + " 24 Locations 5197 non-null object \n", + " 25 Study Documents 182 non-null object \n", + " 26 URL 5783 non-null object \n", + "dtypes: float64(1), int64(1), object(25)\n", + "memory usage: 1.2+ MB\n" + ] + } + ], + "source": [ + "column_info = df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "f134ca37", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Rank</th>\n", + " <th>Enrollment</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>count</th>\n", + " <td>5783.000000</td>\n", + " <td>5.749000e+03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>mean</th>\n", + " <td>2892.000000</td>\n", + " <td>1.831949e+04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>std</th>\n", + " <td>1669.552635</td>\n", + " <td>4.045437e+05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>min</th>\n", + " <td>1.000000</td>\n", + " <td>0.000000e+00</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25%</th>\n", + " <td>1446.500000</td>\n", + " <td>6.000000e+01</td>\n", + " </tr>\n", + " <tr>\n", + " <th>50%</th>\n", + " <td>2892.000000</td>\n", + " <td>1.700000e+02</td>\n", + " </tr>\n", + " <tr>\n", + " <th>75%</th>\n", + " <td>4337.500000</td>\n", + " <td>5.600000e+02</td>\n", + " </tr>\n", + " <tr>\n", + " <th>max</th>\n", + " <td>5783.000000</td>\n", + " <td>2.000000e+07</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Rank Enrollment\n", + "count 5783.000000 5.749000e+03\n", + "mean 2892.000000 1.831949e+04\n", + "std 1669.552635 4.045437e+05\n", + "min 1.000000 0.000000e+00\n", + "25% 1446.500000 6.000000e+01\n", + "50% 2892.000000 1.700000e+02\n", + "75% 4337.500000 5.600000e+02\n", + "max 5783.000000 2.000000e+07" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "descriptive_stats = df.describe()\n", + "descriptive_stats" + ] + }, + { + "cell_type": "markdown", + "id": "4cdc469b", + "metadata": {}, + "source": [ + "# Dataset Insights" + ] + }, + { + "cell_type": "markdown", + "id": "9b23ecb5", + "metadata": {}, + "source": [ + "- The dataset contains 5783 entries with 27 columns.\n", + "- Data types include one float64, one int64, and 25 object types.\n", + "- Rank and Enrollment are the only numerical columns with non-null values across all entries.\n", + "- Acronym, Interventions, Phases, Locations, and Study Documents have a significant number of missing values.\n", + "- Results First Posted has few non-null entries, suggesting that most studies do not have results posted.\n", + "- The average enrollment size is 18,319, which is skewed by a few large studies, as indicated by a large standard deviation of 404,543.\n", + "- The median enrollment size is 170, suggesting that half of the studies have enrollments less than or equal to 170, which is much lower than the mean.\n", + "- The maximum enrollment size is 20,000,000, which is an outlier compared to the rest of the data." + ] + }, + { + "cell_type": "markdown", + "id": "9b490f2d", + "metadata": {}, + "source": [ + "# Q3. Loc and iloc" + ] + }, + { + "cell_type": "markdown", + "id": "001b8678", + "metadata": {}, + "source": [ + "- Select all columns for the first clinical trial in the dataset.\n", + "- Retrieve the Title and Status of the clinical trial with the NCT Number 'NCT04595136'.\n", + "- Get the Sponsor/Collaborators and Start Date for clinical trials that are Recruiting.\n", + "- Select the first 5 rows and columns Title, Conditions, and Outcome Measures.\n", + "- Find the Completion Date and URL for the last 3 clinical trials in the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "f3e5a9f4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Rank 1\n", + "NCT Number NCT04785898\n", + "Title Diagnostic Performance of the ID Now™ COVID-19...\n", + "Acronym COVID-IDNow\n", + "Status Active, not recruiting\n", + "Study Results No Results Available\n", + "Conditions Covid19\n", + "Interventions Diagnostic Test: ID Now™ COVID-19 Screening Test\n", + "Outcome Measures Evaluate the diagnostic performance of the ID ...\n", + "Sponsor/Collaborators Groupe Hospitalier Paris Saint Joseph\n", + "Gender All\n", + "Age 18 Years and older (Adult, Older Adult)\n", + "Phases Not Applicable\n", + "Enrollment 1000.0\n", + "Funded Bys Other\n", + "Study Type Interventional\n", + "Study Designs Allocation: N/A|Intervention Model: Single Gro...\n", + "Other IDs COVID-IDNow\n", + "Start Date November 9, 2020\n", + "Primary Completion Date December 22, 2020\n", + "Completion Date April 30, 2021\n", + "First Posted March 8, 2021\n", + "Results First Posted NaN\n", + "Last Update Posted March 8, 2021\n", + "Locations Groupe Hospitalier Paris Saint-Joseph, Paris, ...\n", + "Study Documents NaN\n", + "URL https://ClinicalTrials.gov/show/NCT04785898\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "first_clinical_trial = df.iloc[0]\n", + "first_clinical_trial" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "2175b33a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Title</th>\n", + " <th>Status</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Study to Evaluate the Efficacy of COVID19-0001...</td>\n", + " <td>Not yet recruiting</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Title Status\n", + "1 Study to Evaluate the Efficacy of COVID19-0001... Not yet recruiting" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "title_status = df.loc[df['NCT Number'] == 'NCT04595136', ['Title', 'Status']]\n", + "title_status" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "5663d0e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Sponsor/Collaborators</th>\n", + " <th>Start Date</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>University of Milano Bicocca</td>\n", + " <td>May 7, 2020</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Centre Hospitalier Régional d'Orléans|Centre d...</td>\n", + " <td>May 5, 2020</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>Fundacion Arturo Lopez Perez|Confederación de ...</td>\n", + " <td>April 7, 2020</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>The Christ Hospital</td>\n", + " <td>April 28, 2020</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>University of Milano Bicocca</td>\n", + " <td>April 16, 2020</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5776</th>\n", + " <td>University of Pittsburgh|U.S. National Science...</td>\n", + " <td>October 26, 2020</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5777</th>\n", + " <td>University of Zurich</td>\n", + " <td>August 25, 2020</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5778</th>\n", + " <td>University of Wisconsin, Madison|National Inst...</td>\n", + " <td>March 23, 2020</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5780</th>\n", + " <td>University of Illinois at Urbana-Champaign</td>\n", + " <td>February 10, 2021</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5781</th>\n", + " <td>Mental Health Services in the Capital Region, ...</td>\n", + " <td>March 22, 2021</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>2805 rows × 2 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Sponsor/Collaborators Start Date\n", + "2 University of Milano Bicocca May 7, 2020\n", + "4 Centre Hospitalier Régional d'Orléans|Centre d... May 5, 2020\n", + "9 Fundacion Arturo Lopez Perez|Confederación de ... April 7, 2020\n", + "10 The Christ Hospital April 28, 2020\n", + "11 University of Milano Bicocca April 16, 2020\n", + "... ... ...\n", + "5776 University of Pittsburgh|U.S. National Science... October 26, 2020\n", + "5777 University of Zurich August 25, 2020\n", + "5778 University of Wisconsin, Madison|National Inst... March 23, 2020\n", + "5780 University of Illinois at Urbana-Champaign February 10, 2021\n", + "5781 Mental Health Services in the Capital Region, ... March 22, 2021\n", + "\n", + "[2805 rows x 2 columns]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "recruiting_info = df.loc[df['Status'] == 'Recruiting', ['Sponsor/Collaborators', 'Start Date']]\n", + "recruiting_info" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "e513680b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Title</th>\n", + " <th>Conditions</th>\n", + " <th>Outcome Measures</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Diagnostic Performance of the ID Now™ COVID-19...</td>\n", + " <td>Covid19</td>\n", + " <td>Evaluate the diagnostic performance of the ID ...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Study to Evaluate the Efficacy of COVID19-0001...</td>\n", + " <td>SARS-CoV-2 Infection</td>\n", + " <td>Change on viral load results from baseline aft...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Lung CT Scan Analysis of SARS-CoV2 Induced Lun...</td>\n", + " <td>covid19</td>\n", + " <td>A qualitative analysis of parenchymal lung dam...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>The Role of a Private Hospital in Hong Kong Am...</td>\n", + " <td>COVID</td>\n", + " <td>Proportion of asymptomatic subjects|Proportion...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Maternal-foetal Transmission of SARS-Cov-2</td>\n", + " <td>Maternal Fetal Infection Transmission|COVID-19...</td>\n", + " <td>COVID-19 by positive PCR in cord blood and / o...</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Title \\\n", + "0 Diagnostic Performance of the ID Now™ COVID-19... \n", + "1 Study to Evaluate the Efficacy of COVID19-0001... \n", + "2 Lung CT Scan Analysis of SARS-CoV2 Induced Lun... \n", + "3 The Role of a Private Hospital in Hong Kong Am... \n", + "4 Maternal-foetal Transmission of SARS-Cov-2 \n", + "\n", + " Conditions \\\n", + "0 Covid19 \n", + "1 SARS-CoV-2 Infection \n", + "2 covid19 \n", + "3 COVID \n", + "4 Maternal Fetal Infection Transmission|COVID-19... \n", + "\n", + " Outcome Measures \n", + "0 Evaluate the diagnostic performance of the ID ... \n", + "1 Change on viral load results from baseline aft... \n", + "2 A qualitative analysis of parenchymal lung dam... \n", + "3 Proportion of asymptomatic subjects|Proportion... \n", + "4 COVID-19 by positive PCR in cord blood and / o... " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "first_5_trials = df.loc[0:4, ['Title', 'Conditions', 'Outcome Measures']]\n", + "first_5_trials" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "4e591b9e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Completion Date</th>\n", + " <th>First Posted</th>\n", + " <th>Results First Posted</th>\n", + " <th>Last Update Posted</th>\n", + " <th>Locations</th>\n", + " <th>Study Documents</th>\n", + " <th>URL</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>5780</th>\n", + " <td>July 2021</td>\n", + " <td>February 5, 2021</td>\n", + " <td>NaN</td>\n", + " <td>February 24, 2021</td>\n", + " <td>University of Illinois at Urbana-Champaign, Ur...</td>\n", + " <td>NaN</td>\n", + " <td>https://ClinicalTrials.gov/show/NCT04740229</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5781</th>\n", + " <td>December 31, 2022</td>\n", + " <td>March 18, 2021</td>\n", + " <td>NaN</td>\n", + " <td>April 1, 2021</td>\n", + " <td>Mental Health Services in the Capital Region, ...</td>\n", + " <td>NaN</td>\n", + " <td>https://ClinicalTrials.gov/show/NCT04804917</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5782</th>\n", + " <td>February 2025</td>\n", + " <td>December 22, 2020</td>\n", + " <td>NaN</td>\n", + " <td>December 22, 2020</td>\n", + " <td>Uniformed Services University for the Health S...</td>\n", + " <td>NaN</td>\n", + " <td>https://ClinicalTrials.gov/show/NCT04680000</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Completion Date First Posted Results First Posted \\\n", + "5780 July 2021 February 5, 2021 NaN \n", + "5781 December 31, 2022 March 18, 2021 NaN \n", + "5782 February 2025 December 22, 2020 NaN \n", + "\n", + " Last Update Posted Locations \\\n", + "5780 February 24, 2021 University of Illinois at Urbana-Champaign, Ur... \n", + "5781 April 1, 2021 Mental Health Services in the Capital Region, ... \n", + "5782 December 22, 2020 Uniformed Services University for the Health S... \n", + "\n", + " Study Documents URL \n", + "5780 NaN https://ClinicalTrials.gov/show/NCT04740229 \n", + "5781 NaN https://ClinicalTrials.gov/show/NCT04804917 \n", + "5782 NaN https://ClinicalTrials.gov/show/NCT04680000 " + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "last_3_trials = df.iloc[-3:, df.columns.get_loc('Completion Date'):df.columns.get_loc('URL')+1]\n", + "last_3_trials" + ] + }, + { + "cell_type": "markdown", + "id": "5f9c3278", + "metadata": {}, + "source": [ + "#### Q4. Determine the missing values in the whole dataset and analyze missing values in each column." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "54d4ade0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total missing values in the dataset: 18805\n" + ] + } + ], + "source": [ + "total_missing_values = df.isnull().sum().sum()\n", + "print(f\"Total missing values in the dataset: {total_missing_values}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "7428b84c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Missing values in each column:\n", + "Rank 0\n", + "NCT Number 0\n", + "Title 0\n", + "Acronym 3303\n", + "Status 0\n", + "Study Results 0\n", + "Conditions 0\n", + "Interventions 886\n", + "Outcome Measures 35\n", + "Sponsor/Collaborators 0\n", + "Gender 10\n", + "Age 0\n", + "Phases 2461\n", + "Enrollment 34\n", + "Funded Bys 0\n", + "Study Type 0\n", + "Study Designs 35\n", + "Other IDs 1\n", + "Start Date 34\n", + "Primary Completion Date 36\n", + "Completion Date 36\n", + "First Posted 0\n", + "Results First Posted 5747\n", + "Last Update Posted 0\n", + "Locations 586\n", + "Study Documents 5601\n", + "URL 0\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "missing_values_per_column = df.isnull().sum()\n", + "print(\"Missing values in each column:\")\n", + "print(missing_values_per_column)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "51075ffa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Percentage of missing values in each column:\n", + "Rank 0.000000\n", + "NCT Number 0.000000\n", + "Title 0.000000\n", + "Acronym 57.115684\n", + "Status 0.000000\n", + "Study Results 0.000000\n", + "Conditions 0.000000\n", + "Interventions 15.320768\n", + "Outcome Measures 0.605222\n", + "Sponsor/Collaborators 0.000000\n", + "Gender 0.172921\n", + "Age 0.000000\n", + "Phases 42.555767\n", + "Enrollment 0.587930\n", + "Funded Bys 0.000000\n", + "Study Type 0.000000\n", + "Study Designs 0.605222\n", + "Other IDs 0.017292\n", + "Start Date 0.587930\n", + "Primary Completion Date 0.622514\n", + "Completion Date 0.622514\n", + "First Posted 0.000000\n", + "Results First Posted 99.377486\n", + "Last Update Posted 0.000000\n", + "Locations 10.133149\n", + "Study Documents 96.852845\n", + "URL 0.000000\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "percentage_missing = (missing_values_per_column / len(df)) * 100\n", + "print(\"Percentage of missing values in each column:\")\n", + "print(percentage_missing)" + ] + }, + { + "cell_type": "markdown", + "id": "8a8943a5", + "metadata": {}, + "source": [ + "#### Q5. Calculate the sum of duplicate rows" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "cae78405", + "metadata": {}, + "outputs": [], + "source": [ + "duplicate_rows = df[df.duplicated()]" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "2a29a019", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sum of duplicate rows for numeric columns:\n", + "Rank 0.0\n", + "Enrollment 0.0\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "sum_of_duplicates = duplicate_rows.sum(numeric_only=True)\n", + "sum_of_duplicates\n", + "\n", + "print(\"Sum of duplicate rows for numeric columns:\")\n", + "print(sum_of_duplicates)" + ] + }, + { + "cell_type": "markdown", + "id": "f33b43a2", + "metadata": {}, + "source": [ + "## Q6. Solve following question by using conditional statements" + ] + }, + { + "cell_type": "markdown", + "id": "1f8a1b35", + "metadata": {}, + "source": [ + "- How many studies have an enrollment greater than a certain threshold? \n", + "- How many clinical trials have 'No Results Available'? \n", + "- How many clinical trials are in an \"Completed\" and \"Recruiting\" status? \n", + "- How many clinical trials are related to 'COVID-19'? \n", + "- How many clinical trials started after January 1, 2020" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "1b517edb", + "metadata": {}, + "outputs": [], + "source": [ + "enrollment_threshold = 1000\n", + "studies_above_threshold = df[df['Enrollment'] > enrollment_threshold].shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "ae474e6a", + "metadata": {}, + "outputs": [], + "source": [ + "no_results_count = df[df['Study Results'] == 'No Results Available'].shape[0]\n", + "completed_count = df[df['Status'] == 'Completed'].shape[0]\n", + "recruiting_count = df[df['Status'] == 'Recruiting'].shape[0]\n", + "covid_trials_count = df[df['Conditions'].str.contains('COVID-19', case=False, na=False)].shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "f03f11cf", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "df['Start Date'] = pd.to_datetime(df['Start Date'], errors='coerce')\n", + "threshold_date = pd.to_datetime('2020-01-01')\n", + "\n", + "trials_started_after_2020 = df[df['Start Date'] > threshold_date].shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "57a80afc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Studies with enrollment greater than 1000: 911\n", + "Clinical trials with 'No Results Available': 5747\n", + "Clinical trials in 'Completed' status: 1025\n", + "Clinical trials in 'Recruiting' status: 2805\n", + "Clinical trials related to 'COVID-19': 1872\n", + "Clinical trials started after January 1, 2020: 5066\n" + ] + } + ], + "source": [ + "print(f\"Studies with enrollment greater than {enrollment_threshold}: {studies_above_threshold}\")\n", + "print(f\"Clinical trials with 'No Results Available': {no_results_count}\")\n", + "print(f\"Clinical trials in 'Completed' status: {completed_count}\")\n", + "print(f\"Clinical trials in 'Recruiting' status: {recruiting_count}\")\n", + "print(f\"Clinical trials related to 'COVID-19': {covid_trials_count}\")\n", + "print(f\"Clinical trials started after January 1, 2020: {trials_started_after_2020}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}