[8c54ae]: / notebooks / scraping.ipynb

Download this file

1028 lines (1027 with data), 46.4 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.support.ui import Select\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.support.ui import WebDriverWait\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "from selenium.common.exceptions import TimeoutException\n",
    "import time"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "//*[@id=\"adv-check-status\"]/div/div[2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Radio button for 'Recruiting and not yet recruiting studies' is detected and visible: True\n"
     ]
    }
   ],
   "source": [
    "driver = webdriver.Chrome()\n",
    "driver.set_window_size(1120, 1000)\n",
    "driver.get(\"https://clinicaltrials.gov\")\n",
    "\n",
    "try:\n",
    "    # Wait for the radio group container to be visible\n",
    "    WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"main-content\"]/ctg-home/div/div[2]/ctg-home-search-panel/div/div[2]/ctg-search-filters-form/div[2]'))\n",
    "    )\n",
    "\n",
    "    # Option 1: Wait for the radio button using ID\n",
    "    recruiting_radio_button = WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"adv-check-status\"]/div/div[2]'))\n",
    "    )\n",
    "\n",
    "    # Option 2: Wait for the label and locate the radio button\n",
    "    # recruiting_radio_label = WebDriverWait(driver, 20).until(\n",
    "    #     EC.element_to_be_clickable((By.XPATH, \"//label[@for='adv-radio-status1']\"))\n",
    "    # )\n",
    "    # recruiting_radio_button = driver.find_element(By.XPATH, \"//input[@id='adv-radio-status1']\")\n",
    "\n",
    "    # Print if the radio button is displayed\n",
    "    print(\"Radio button for 'Recruiting and not yet recruiting studies' is detected and visible:\", recruiting_radio_button.is_displayed())\n",
    "    \n",
    "   # Scroll the page to bring the element into view before clicking\n",
    "    driver.execute_script(\"arguments[0].scrollIntoView(true);\", recruiting_radio_button)\n",
    "\n",
    "    # Click the button after scrolling to it\n",
    "    recruiting_radio_button.click()\n",
    "\n",
    "\n",
    "except TimeoutException:\n",
    "    print(\"The radio button was not found or is not interactable.\")\n",
    "    print(driver.page_source)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Recruiting option selected successfully.\n",
      "Search button clicked, navigating to the results page.\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.support.ui import WebDriverWait\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "from selenium.common.exceptions import TimeoutException\n",
    "import time\n",
    "\n",
    "# Set up the driver and open the page\n",
    "driver = webdriver.Chrome()\n",
    "driver.set_window_size(1120, 1000)\n",
    "driver.get(\"https://clinicaltrials.gov\")\n",
    "\n",
    "try:\n",
    "    # Wait for the radio group container to be visible\n",
    "    WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"main-content\"]/ctg-home/div/div[2]/ctg-home-search-panel/div/div[2]/ctg-search-filters-form/div[2]'))\n",
    "    )\n",
    "\n",
    "    # Scroll down slowly to the radio button section\n",
    "    driver.execute_script(\"window.scrollBy(0, 600);\")  # Adjust scrolling value as needed\n",
    "    time.sleep(1)  # Give time for the scrolling action to be visually clear\n",
    "\n",
    "    # Locate and click the 'Recruiting and not yet recruiting studies' radio button\n",
    "    recruiting_radio_button = WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"adv-check-status\"]/div/div[2]')) #//*[@id=\"adv-check-status\"]/div/div[2]\n",
    "        # //*[@id=\"adv-check-status\"]/div/div[2]\n",
    "    )\n",
    "\n",
    "    # Scroll the page to bring the radio button into view and click it\n",
    "    driver.execute_script(\"arguments[0].scrollIntoView(true);\", recruiting_radio_button)\n",
    "    time.sleep(1)  # Pause before interacting\n",
    "\n",
    "    # Ensure the 'Recruiting and not yet recruiting' radio button is selected\n",
    "    recruiting_radio_button.click()\n",
    "    time.sleep(1)  # Wait a bit to confirm the selection\n",
    "    \n",
    "    # Optionally verify the selection\n",
    "    selected_status = driver.find_element(By.XPATH, '//*[@id=\"adv-radio-status1\"]').is_selected()\n",
    "    if selected_status:\n",
    "        print(\"Recruiting option selected successfully.\")\n",
    "    else:\n",
    "        print(\"Failed to select Recruiting option.\")\n",
    "\n",
    "    # Now, wait for the search button to be visible\n",
    "    search_button = WebDriverWait(driver, 20).until(\n",
    "        EC.element_to_be_clickable((By.XPATH, '//*[@id=\"main-content\"]/ctg-home/div/div[2]/ctg-home-search-panel/div/div[3]/div/div/button'))\n",
    "    )\n",
    "\n",
    "    # Scroll the page to bring the search button into view (just in case)\n",
    "    driver.execute_script(\"arguments[0].scrollIntoView(true);\", search_button)\n",
    "    time.sleep(1)  # Wait a bit before clicking\n",
    "\n",
    "    # Click the search button\n",
    "    search_button.click()\n",
    "    print(\"Search button clicked, navigating to the results page.\")\n",
    "\n",
    "except TimeoutException:\n",
    "    print(\"The search button was not found or is not interactable.\")\n",
    "    print(driver.page_source)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Recruiting option selected successfully.\n",
      "Search button clicked, navigating to the results page.\n",
      "'Not yet recruiting' checkbox was already unselected.\n",
      "An error occurred: Message: \n",
      "\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.support.ui import WebDriverWait\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "import time\n",
    "\n",
    "# Set up the driver and open the main Clinical Trials page\n",
    "driver = webdriver.Chrome()\n",
    "driver.set_window_size(1120, 1000)\n",
    "driver.get(\"https://clinicaltrials.gov\")\n",
    "\n",
    "try:\n",
    "    # ----- FIRST PAGE -----\n",
    "    # Wait for the radio group container to be visible\n",
    "    WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"main-content\"]/ctg-home/div/div[2]/ctg-home-search-panel/div/div[2]/ctg-search-filters-form/div[2]'))\n",
    "    )\n",
    "\n",
    "    # Scroll down slowly to the radio button section\n",
    "    driver.execute_script(\"window.scrollBy(0, 600);\")  # Adjust scrolling value as needed\n",
    "    time.sleep(1)  # Give time for the scrolling action to be visually clear\n",
    "\n",
    "\n",
    "    # Locate and click the 'Recruiting and not yet recruiting studies' radio button\n",
    "    recruiting_radio_button = WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"adv-check-status\"]/div/div[2]'))\n",
    "    )\n",
    "    \n",
    "    # Scroll the page to bring the radio button into view and click it\n",
    "    driver.execute_script(\"arguments[0].scrollIntoView(true);\", recruiting_radio_button)\n",
    "    time.sleep(1)  # Pause before interacting\n",
    "    \n",
    "    # Ensure the 'Recruiting and not yet recruiting' radio button is selected\n",
    "    recruiting_radio_button.click()\n",
    "    time.sleep(1)  # Wait a bit to confirm the selection\n",
    "    \n",
    "    # Optionally verify the selection\n",
    "    selected_status = driver.find_element(By.XPATH, '//*[@id=\"adv-radio-status1\"]').is_selected()\n",
    "    if selected_status:\n",
    "        print(\"Recruiting option selected successfully.\")\n",
    "    else:\n",
    "        print(\"Failed to select Recruiting option.\")\n",
    "        \n",
    "    \n",
    "    # Now, wait for the search button to be visible\n",
    "    search_button = WebDriverWait(driver, 20).until(\n",
    "        EC.element_to_be_clickable((By.XPATH, '//*[@id=\"main-content\"]/ctg-home/div/div[2]/ctg-home-search-panel/div/div[3]/div/div/button'))\n",
    "    )\n",
    "    \n",
    "    # Scroll the page to bring the search button into view (just in case)\n",
    "    driver.execute_script(\"arguments[0].scrollIntoView(true);\", search_button)\n",
    "    time.sleep(1)  # Wait a bit before clicking\n",
    "    \n",
    "    # Click the search button\n",
    "    search_button.click()\n",
    "    print(\"Search button clicked, navigating to the results page.\")\n",
    "\n",
    "   \n",
    "    # Wait for the second page to load\n",
    "    time.sleep(5)  # You may want to adjust this based on your internet speed\n",
    "\n",
    "    # ----- SECOND PAGE -----\n",
    "    # Wait for the filter container to be visible on the second page\n",
    "    filter_container = WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"main-content\"]/ctg-search-results-page/div[2]/section/div[1]/ctg-focus-your-search-panel/div/div[2]/ctg-search-filters-form'))\n",
    "    )\n",
    "\n",
    "    # Scroll to the filter container\n",
    "    driver.execute_script(\"arguments[0].scrollIntoView(true);\", filter_container)\n",
    "    time.sleep(1)\n",
    "\n",
    "    # Locate the \"Not yet recruiting\" checkbox and unselect it if it's selected\n",
    "    not_yet_recruiting_checkbox = WebDriverWait(driver, 20).until(\n",
    "        EC.element_to_be_clickable((By.XPATH, '//*[@id=\"adv-check-status\"]/div[2]/div[1]'))\n",
    "    )\n",
    "    \n",
    "    # Unselect the checkbox if it's selected\n",
    "    if not_yet_recruiting_checkbox.is_selected():\n",
    "        not_yet_recruiting_checkbox.click()\n",
    "        print(\"Unselected the 'Not yet recruiting' checkbox.\")\n",
    "    else:\n",
    "        print(\"'Not yet recruiting' checkbox was already unselected.\")\n",
    "\n",
    "    # Scroll to the Apply Filters button\n",
    "    apply_filters_button = WebDriverWait(driver, 20).until(\n",
    "        EC.element_to_be_clickable((By.XPATH, '//*[@id=\"apply-filters\"]'))\n",
    "    )\n",
    "    driver.execute_script(\"arguments[0].scrollIntoView(true);\", apply_filters_button)\n",
    "    time.sleep(1)\n",
    "\n",
    "    # Click the Apply Filters button\n",
    "    apply_filters_button.click()\n",
    "    print(\"Clicked the 'Apply filters' button.\")\n",
    "\n",
    "except Exception as e:\n",
    "    print(\"An error occurred:\", e)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Waiting for the filter container to load...\n",
      "Filter container loaded successfully.\n",
      "Verifying the 'Not yet recruiting' checkbox...\n",
      "Checkbox 'Not yet recruiting' found.\n",
      "Checkbox is already unselected.\n",
      "Verifying the 'Apply Filters' button...\n",
      "An error occurred: Message: \n",
      "\n",
      "Script completed. Keeping the browser window open.\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.support.ui import WebDriverWait\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "import time\n",
    "\n",
    "# Set up the driver and open the specific page directly\n",
    "driver = webdriver.Chrome()\n",
    "driver.set_window_size(1120, 1000)\n",
    "driver.get(\"https://clinicaltrials.gov/search?aggFilters=status:not%20rec\")\n",
    "\n",
    "try:\n",
    "    # ----- STEP 1: Verify filter container -----\n",
    "    print(\"Waiting for the filter container to load...\")\n",
    "    filter_container = WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"main-content\"]/ctg-search-results-page/div[2]/section/div[1]/ctg-focus-your-search-panel/div/div[2]/ctg-search-filters-form'))\n",
    "    )\n",
    "    print(\"Filter container loaded successfully.\")\n",
    "    time.sleep(2)  # Pause for visual confirmation\n",
    "\n",
    "    # ----- STEP 2: Verify 'Not yet recruiting' checkbox -----\n",
    "    print(\"Verifying the 'Not yet recruiting' checkbox...\")\n",
    "    not_yet_recruiting_checkbox = WebDriverWait(driver, 20).until(\n",
    "        EC.presence_of_element_located((By.XPATH, '//*[@id=\"adv-check-status\"]/div[2]/div[1]'))\n",
    "    )\n",
    "    print(\"Checkbox 'Not yet recruiting' found.\")\n",
    "    time.sleep(2)  # Pause for visual confirmation\n",
    "\n",
    "    # Check if it's selected, and unselect if it is\n",
    "    if not_yet_recruiting_checkbox.is_selected():\n",
    "        print(\"Checkbox is selected, unselecting it...\")\n",
    "        not_yet_recruiting_checkbox.click()\n",
    "        print(\"Checkbox unselected successfully.\")\n",
    "    else:\n",
    "        print(\"Checkbox is already unselected.\")\n",
    "    time.sleep(2)  # Pause for visual confirmation\n",
    "\n",
    "    # ----- STEP 3: Verify 'Apply Filters' button -----\n",
    "    print(\"Verifying the 'Apply Filters' button...\")\n",
    "    apply_filters_button = WebDriverWait(driver, 20).until(\n",
    "        EC.element_to_be_clickable((By.XPATH, '//*[@id=\"apply-filters\"]'))\n",
    "    )\n",
    "    print(\"'Apply Filters' button is available.\")\n",
    "    time.sleep(2)  # Pause for visual confirmation\n",
    "\n",
    "    # ----- STEP 4: Click 'Apply Filters' -----\n",
    "    print(\"Clicking the 'Apply Filters' button...\")\n",
    "    apply_filters_button.click()\n",
    "    print(\"Clicked 'Apply Filters'.\")\n",
    "    time.sleep(2)  # Pause for visual confirmation of the results after applying the filter\n",
    "\n",
    "except Exception as e:\n",
    "    print(\"An error occurred:\", e)\n",
    "\n",
    "finally:\n",
    "    # Keep the browser open\n",
    "    print(\"Script completed. Keeping the browser window open.\")\n",
    "    input(\"Press Enter to close the browser window...\")\n",
    "    driver.quit()  # Only quits when you press Enter\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Filter container loaded successfully.\n",
      "Scrolling the filter container...\n",
      "Script completed. Keeping the browser window open.\n"
     ]
    }
   ],
   "source": [
    "# ----- FIRST PAGE -----\n",
    "# Wait for the filter group container to be visible\n",
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.support.ui import WebDriverWait\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "import time\n",
    "\n",
    "# Set up the driver\n",
    "driver = webdriver.Chrome()\n",
    "driver.set_window_size(1120, 1000)\n",
    "\n",
    "# Navigate to the page\n",
    "driver.get(\"https://clinicaltrials.gov/search?aggFilters=status:not%20rec\")\n",
    "\n",
    "try:\n",
    "    # Step 1: Wait for the filter container to load\n",
    "    filter_container = WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"main-content\"]/ctg-search-results-page/div[2]/section/div[1]/ctg-focus-your-search-panel/div/div[2]/ctg-search-filters-form'))\n",
    "    )\n",
    "    print(\"Filter container loaded successfully.\")\n",
    "    time.sleep(2)\n",
    "\n",
    "    # Step 2: Scroll within the filter container using JavaScript\n",
    "    print(\"Scrolling the filter container...\")\n",
    "    \n",
    "    # Scroll by a specific amount (e.g., 500 pixels)\n",
    "    driver.execute_script(\"arguments[0].scrollTop = arguments[0].scrollTop + 500;\", filter_container)\n",
    "    \n",
    "    # Alternatively, you can scroll to the bottom of the container:\n",
    "    # driver.execute_script(\"arguments[0].scrollTop = arguments[0].scrollHeight;\", filter_container)\n",
    "    \n",
    "    time.sleep(2)  # Pause for visual confirmation\n",
    "\n",
    "except Exception as e:\n",
    "    print(\"An error occurred:\", e)\n",
    "\n",
    "finally:\n",
    "    # Keep the browser open\n",
    "    print(\"Script completed. Keeping the browser window open.\")\n",
    "    input(\"Press Enter to close the browser window...\")\n",
    "    driver.quit()\n",
    "  # Pause for visual confirmation\n",
    "\n",
    "\n",
    "# # Locate and click the 'Recruiting and not yet recruiting studies' radio button\n",
    "# recruiting_radio_button = WebDriverWait(driver, 20).until(\n",
    "#     EC.visibility_of_element_located((By.XPATH, '//*[@id=\"adv-check-status\"]/div/div[2]'))\n",
    "# )\n",
    "\n",
    "# # Scroll the page to bring the radio button into view and click it\n",
    "# driver.execute_script(\"arguments[0].scrollIntoView(true);\", recruiting_radio_button)\n",
    "# time.sleep(1)  # Pause before interacting\n",
    "\n",
    "# # Ensure the 'Recruiting and not yet recruiting' radio button is selected\n",
    "# recruiting_radio_button.click()\n",
    "# time.sleep(1)  # Wait a bit to confirm the selection\n",
    "\n",
    "# # Optionally verify the selection\n",
    "# selected_status = driver.find_element(By.XPATH, '//*[@id=\"adv-radio-status1\"]').is_selected()\n",
    "# if selected_status:\n",
    "#     print(\"Recruiting option selected successfully.\")\n",
    "# else:\n",
    "#     print(\"Failed to select Recruiting option.\")\n",
    "    \n",
    "\n",
    "# # Now, wait for the search button to be visible\n",
    "# search_button = WebDriverWait(driver, 20).until(\n",
    "#     EC.element_to_be_clickable((By.XPATH, '//*[@id=\"main-content\"]/ctg-home/div/div[2]/ctg-home-search-panel/div/div[3]/div/div/button'))\n",
    "# )\n",
    "\n",
    "# # Scroll the page to bring the search button into view (just in case)\n",
    "# driver.execute_script(\"arguments[0].scrollIntoView(true);\", search_button)\n",
    "# time.sleep(1)  # Wait a bit before clicking\n",
    "\n",
    "# # Click the search button\n",
    "# search_button.click()\n",
    "# print(\"Search button clicked, navigating to the results page.\")\n",
    "\n",
    "\n",
    "# # Wait for the second page to load\n",
    "# time.sleep(5)  # You may want to adjust this based on your internet speed\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Filter container loaded successfully.\n",
      "Script completed. Keeping the browser window open.\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.support.ui import WebDriverWait\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "import time\n",
    "\n",
    "# Set up the driver\n",
    "driver = webdriver.Chrome()\n",
    "driver.set_window_size(1120, 1000)\n",
    "\n",
    "# Navigate to the page\n",
    "driver.get(\"https://clinicaltrials.gov/search?aggFilters=status:not%20rec\")\n",
    "\n",
    "try:\n",
    "    # Wait for the filter container to be visible\n",
    "    filter_container = WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"main-content\"]/ctg-search-results-page/div[2]/section/div[1]/ctg-focus-your-search-panel/div/div[2]/ctg-search-filters-form'))\n",
    "    )\n",
    "    \n",
    "    print(\"Filter container loaded successfully.\")\n",
    "    \n",
    "    # Highlight the filter container by adding a red border around it\n",
    "    driver.execute_script(\"arguments[0].style.border='3px solid red'\", filter_container)\n",
    "    \n",
    "    # Pause to visually inspect the browser\n",
    "    time.sleep(5)  # Adjust this if needed for more time to inspect\n",
    "\n",
    "except Exception as e:\n",
    "    print(\"An error occurred:\", e)\n",
    "\n",
    "finally:\n",
    "    # Keep the browser open for visual confirmation\n",
    "    print(\"Script completed. Keeping the browser window open.\")\n",
    "    input(\"Press Enter to close the browser window...\")\n",
    "    driver.quit()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Not Yet Recruiting button loaded successfully.\n",
      "Script completed. Keeping the browser window open.\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.support.ui import WebDriverWait\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "import time\n",
    "\n",
    "# Set up the driver\n",
    "driver = webdriver.Chrome()\n",
    "driver.set_window_size(1120, 1000)\n",
    "\n",
    "# Navigate to the page\n",
    "driver.get(\"https://clinicaltrials.gov/search?aggFilters=status:not%20rec\")\n",
    "\n",
    "try:\n",
    "    # Wait for the \"Not Yet Recruiting\" radio button to be visible\n",
    "    not_yet_recruiting_button = WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"adv-check-status\"]/div[2]/div[1]'))\n",
    "    )\n",
    "    \n",
    "    print(\"Not Yet Recruiting button loaded successfully.\")\n",
    "    \n",
    "    # Highlight the \"Not Yet Recruiting\" button by adding a red border around it\n",
    "    driver.execute_script(\"arguments[0].style.border='3px solid red'\", not_yet_recruiting_button)\n",
    "    \n",
    "    # Pause to visually inspect the browser\n",
    "    time.sleep(5)  # Adjust this if needed for more time to inspect\n",
    "\n",
    "except Exception as e:\n",
    "    print(\"An error occurred:\", e)\n",
    "\n",
    "finally:\n",
    "    # Keep the browser open for visual confirmation\n",
    "    print(\"Script completed. Keeping the browser window open.\")\n",
    "    input(\"Press Enter to close the browser window...\")\n",
    "    driver.quit()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Not Yet Recruiting checkbox loaded successfully.\n",
      "Not Yet Recruiting checkbox is currently selected. Unselecting it...\n",
      "Not Yet Recruiting checkbox has been unselected.\n",
      "Script completed. Keeping the browser window open.\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "Interrupted by user",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[35], line 50\u001b[0m\n\u001b[1;32m     47\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m     48\u001b[0m     \u001b[38;5;66;03m# Keep the browser open for visual confirmation\u001b[39;00m\n\u001b[1;32m     49\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mScript completed. Keeping the browser window open.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 50\u001b[0m     \u001b[38;5;28;43minput\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPress Enter to close the browser window...\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     51\u001b[0m     driver\u001b[38;5;241m.\u001b[39mquit()\n",
      "File \u001b[0;32m~/Desktop/patient-trials-matching/env/lib/python3.9/site-packages/ipykernel/kernelbase.py:1282\u001b[0m, in \u001b[0;36mKernel.raw_input\u001b[0;34m(self, prompt)\u001b[0m\n\u001b[1;32m   1280\u001b[0m     msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraw_input was called, but this frontend does not support input requests.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1281\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m StdinNotImplementedError(msg)\n\u001b[0;32m-> 1282\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_input_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1283\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1284\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_parent_ident\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mshell\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1285\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_parent\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mshell\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1286\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpassword\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1287\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/Desktop/patient-trials-matching/env/lib/python3.9/site-packages/ipykernel/kernelbase.py:1325\u001b[0m, in \u001b[0;36mKernel._input_request\u001b[0;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[1;32m   1322\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyboardInterrupt\u001b[39;00m:\n\u001b[1;32m   1323\u001b[0m     \u001b[38;5;66;03m# re-raise KeyboardInterrupt, to truncate traceback\u001b[39;00m\n\u001b[1;32m   1324\u001b[0m     msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInterrupted by user\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1325\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyboardInterrupt\u001b[39;00m(msg) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1326\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[1;32m   1327\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlog\u001b[38;5;241m.\u001b[39mwarning(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid Message:\u001b[39m\u001b[38;5;124m\"\u001b[39m, exc_info\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: Interrupted by user"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.support.ui import WebDriverWait\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "import time\n",
    "\n",
    "# Set up the driver\n",
    "driver = webdriver.Chrome()\n",
    "driver.set_window_size(1120, 1000)\n",
    "\n",
    "# Navigate to the page\n",
    "driver.get(\"https://clinicaltrials.gov/search?aggFilters=status:not%20rec\")\n",
    "\n",
    "try:\n",
    "    # Wait for the \"Not Yet Recruiting\" checkbox to be visible\n",
    "    not_yet_recruiting_checkbox = WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"adv-check-status\"]/div[2]/div[1]'))\n",
    "    )\n",
    "    \n",
    "    print(\"Not Yet Recruiting checkbox loaded successfully.\")\n",
    "    \n",
    "    # Check if the checkbox is selected\n",
    "    checkbox_input = not_yet_recruiting_checkbox.find_element(By.TAG_NAME, 'input')\n",
    "    \n",
    "    if checkbox_input.is_selected():\n",
    "        print(\"Not Yet Recruiting checkbox is currently selected. Unselecting it...\")\n",
    "        \n",
    "        # Scroll into view\n",
    "        driver.execute_script(\"arguments[0].scrollIntoView(true);\", not_yet_recruiting_checkbox)\n",
    "        \n",
    "        # Use JavaScript to click the checkbox to unselect it\n",
    "        driver.execute_script(\"arguments[0].click();\", checkbox_input)\n",
    "\n",
    "        # Highlight it after unselecting\n",
    "        driver.execute_script(\"arguments[0].style.border='3px solid red'\", not_yet_recruiting_checkbox)\n",
    "\n",
    "        print(\"Not Yet Recruiting checkbox has been unselected.\")\n",
    "    else:\n",
    "        print(\"Not Yet Recruiting checkbox is already unselected.\")\n",
    "\n",
    "    # Pause to visually inspect the browser\n",
    "    time.sleep(5)  # Adjust this if needed for more time to inspect\n",
    "\n",
    "except Exception as e:\n",
    "    print(\"An error occurred:\", e)\n",
    "\n",
    "finally:\n",
    "    # Keep the browser open for visual confirmation\n",
    "    print(\"Script completed. Keeping the browser window open.\")\n",
    "    input(\"Press Enter to close the browser window...\")\n",
    "    driver.quit()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Eligibility Criteria container loaded successfully.\n",
      "Eligibility Criteria:\n",
      "Description\n",
      "Inclusion Criteria:\n",
      "Age ≥ 18 years at inclusion date\n",
      "Schirmer's test ≤ 5 mm for both eyes\n",
      "NIBUT ≤ 10 s for both eyes\n",
      "Previous positive result of blood test for sample anti-Ro (SS-A) or anti-La (SS-B), as indicated by medical record or lab result shown by the subject.\n",
      "The study subject reports having understood and have signed the Informed Consent Form (ICF) and is willing to comply with all investigation visits and assessments.\n",
      "Women of childbearing potential must agree to use a reliable, medically approved form of contraception during the study participation until end of study.\n",
      "Anticipated compliance with prescribed treatment and follow-up.\n",
      "Exclusion Criteria:\n",
      "Recently (12 months prior enrolment) undergone nasal, sinus, or ocular surgery.\n",
      "Presence of an ocular or respiratory condition that could affect the study parameters such as active ocular infection/inflammation, glaucoma, diabetic retinopathy, or upper respiratory tract infection per the Investigator's judgement.\n",
      "The study subject has a cognitive incapacity or language barrier precluding adequate understanding or cooperation.\n",
      "Any severe diseases interfering with the performance, evaluation, and outcome of the clinical evaluation.\n",
      "The study subject is considered by the Investigator to be unsuitable to participate in the investigation for any other reason.\n",
      "Previous (within 30 days prior to enrolment) and concurrent treatment with another investigational drug/s or device/s.\n",
      "Subject is pregnant or lactating or planning to get pregnant during the duration of the study.\n",
      "Show less\n",
      "Ages Eligible for Study\n",
      "18 Years and older (Adult,  Older Adult )\n",
      "Sexes Eligible for Study\n",
      "All\n",
      "Accepts Healthy Volunteers\n",
      "No\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.support.ui import WebDriverWait\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "import time\n",
    "\n",
    "# Set up the Selenium WebDriver\n",
    "driver = webdriver.Chrome()\n",
    "\n",
    "# Navigate to the webpage\n",
    "driver.get('https://clinicaltrials.gov/study/NCT06626477#participation-criteria')\n",
    "\n",
    "# Wait for the \"Eligibility Criteria\" container to load\n",
    "try:\n",
    "    eligibility_criteria_container = WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"participation-criteria\"]/ctg-participation-criteria/div[2]/div/div[2]'))\n",
    "    )\n",
    "    \n",
    "    print(\"Eligibility Criteria container loaded successfully.\")\n",
    "    \n",
    "    # Extract the text from the eligibility container\n",
    "    eligibility_text = eligibility_criteria_container.text\n",
    "    print(\"Eligibility Criteria:\")\n",
    "    print(eligibility_text)\n",
    "    \n",
    "    # Optionally save the data to a text file\n",
    "    with open('eligibility_criteria.txt', 'w') as file:\n",
    "        file.write(eligibility_text)\n",
    "    \n",
    "except Exception as e:\n",
    "    print(\"An error occurred:\", e)\n",
    "\n",
    "finally:\n",
    "    # Pause to visually inspect the browser\n",
    "    time.sleep(5)\n",
    "\n",
    "    # Close the browser\n",
    "    driver.quit()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Eligibility Criteria container loaded successfully.\n",
      "[]\n",
      "[]\n",
      "\n",
      "Data successfully written to eligibility_criteria_dynamic.txt\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.support.ui import WebDriverWait\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "import time\n",
    "\n",
    "# Set up the Selenium WebDriver\n",
    "driver = webdriver.Chrome()\n",
    "\n",
    "# Navigate to the webpage\n",
    "driver.get('https://clinicaltrials.gov/study/NCT06626386?aggFilters=status:rec&rank=2#participation-criteria')\n",
    "\n",
    "# Wait for the Eligibility Criteria container to load\n",
    "try:\n",
    "    eligibility_container = WebDriverWait(driver, 20).until(\n",
    "        EC.visibility_of_element_located((By.XPATH, '//*[@id=\"participation-criteria\"]/ctg-participation-criteria/div[2]/div/div[2]'))\n",
    "    )\n",
    "    \n",
    "    print(\"Eligibility Criteria container loaded successfully.\")\n",
    "    \n",
    "    # Find and extract the headers (like \"Description\", \"Ages Eligible for Study\", etc.)\n",
    "    headers = driver.find_elements(By.XPATH, '//*[@id=\"participation-criteria\"]/ctg-participation-criteria/div[2]/div/div[2]//dt')\n",
    "    \n",
    "    # Find and extract the content corresponding to each header\n",
    "    contents = driver.find_elements(By.XPATH, '//*[@id=\"participation-criteria\"]/ctg-participation-criteria/div[2]/div/div[2]//dd')\n",
    "    \n",
    "    print(headers)\n",
    "    print(contents)\n",
    "    \n",
    "    # Create a dictionary to store criteria dynamically\n",
    "    criteria_data = {}\n",
    "    \n",
    "    # Loop over the headers and their respective contents\n",
    "    for header, content in zip(headers, contents):\n",
    "        header_text = header.text.strip()  # Get the header text\n",
    "        content_text = content.text.strip()  # Get the corresponding content text\n",
    "        \n",
    "        # Store in dictionary\n",
    "        criteria_data[header_text] = content_text\n",
    "\n",
    "    # Dynamically generate formatted criteria text\n",
    "    formatted_criteria = \"\"\n",
    "    for header, content in criteria_data.items():\n",
    "        formatted_criteria += f\"{header}: {content}\\n\\n\"  # Add each header-content pair to the formatted string\n",
    "\n",
    "    # Print the formatted output to verify\n",
    "    print(formatted_criteria)\n",
    "    \n",
    "    # Save the output to a text file\n",
    "    with open('eligibility_criteria_dynamic.txt', 'w') as file:\n",
    "        file.write(formatted_criteria)\n",
    "    \n",
    "    print(\"Data successfully written to eligibility_criteria_dynamic.txt\")\n",
    "    \n",
    "except Exception as e:\n",
    "    print(\"An error occurred:\", e)\n",
    "\n",
    "finally:\n",
    "    # Pause to visually inspect the browser\n",
    "    time.sleep(5)\n",
    "\n",
    "    # Close the browser\n",
    "    driver.quit()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Inclusion/Exclusion Criteria container loaded successfully.\n",
      "Inclusion/Exclusion Criteria:\n",
      "Description\n",
      "Inclusion Criteria:\n",
      "Men aged from 45 to 70 years\n",
      "Informed consent for PSA measurement for prostate cancer screening\n",
      "Exclusion Criteria:\n",
      "Patients unable to give consent\n",
      "Show less\n",
      "Study Population\n",
      "Male patients between the ages of 40 and 70 who receive a PSA test as part of their routine examination\n",
      "Other Criteria container loaded successfully.\n",
      "Other Criteria:\n",
      "Ages Eligible for Study\n",
      "45 Years to 70 Years (Adult,  Older Adult )\n",
      "Sexes Eligible for Study\n",
      "Male\n",
      "Accepts Healthy Volunteers\n",
      "No\n",
      "Sampling Method\n",
      "Non-Probability Sample\n",
      "Data successfully written to clinical_trial_criteria.txt\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.support.ui import WebDriverWait\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "import time\n",
    "\n",
    "# Initialize the WebDriver (Make sure to specify the correct path to your chromedriver)\n",
    "driver = webdriver.Chrome()\n",
    "\n",
    "# Open the clinical trials page\n",
    "driver.get(\"https://clinicaltrials.gov/study/NCT06626386?aggFilters=status:rec&rank=2#participation-criteria\")\n",
    "\n",
    "# Wait for the page to load completely\n",
    "time.sleep(5)\n",
    "\n",
    "try:\n",
    "    # Wait for the inclusion/exclusion criteria container to load\n",
    "    inclusion_exclusion_criteria = WebDriverWait(driver, 20).until(\n",
    "        EC.presence_of_element_located((By.XPATH, '//*[@id=\"participation-criteria\"]/ctg-participation-criteria/div[2]/div/div[2]/div[1]'))\n",
    "    )\n",
    "    print(\"Inclusion/Exclusion Criteria container loaded successfully.\")\n",
    "\n",
    "    # Scraping inclusion/exclusion criteria\n",
    "    inclusion_exclusion_text = inclusion_exclusion_criteria.text\n",
    "    print(\"Inclusion/Exclusion Criteria:\")\n",
    "    print(inclusion_exclusion_text)\n",
    "\n",
    "    # Wait for the other criteria container to load\n",
    "    other_criteria = WebDriverWait(driver, 20).until(\n",
    "        EC.presence_of_element_located((By.XPATH, '//*[@id=\"participation-criteria\"]/ctg-participation-criteria/div[2]/div/div[2]/div[2]'))\n",
    "    )\n",
    "    print(\"Other Criteria container loaded successfully.\")\n",
    "\n",
    "    # Scraping other criteria\n",
    "    other_criteria_text = other_criteria.text\n",
    "    print(\"Other Criteria:\")\n",
    "    print(other_criteria_text)\n",
    "\n",
    "    # Writing the scraped data to a text file\n",
    "    with open('clinical_trial_criteria.txt', 'w') as file:\n",
    "        file.write(\"Inclusion/Exclusion Criteria:\\n\")\n",
    "        file.write(inclusion_exclusion_text + \"\\n\\n\")\n",
    "        file.write(\"Other Criteria:\\n\")\n",
    "        file.write(other_criteria_text)\n",
    "\n",
    "    print(\"Data successfully written to clinical_trial_criteria.txt\")\n",
    "\n",
    "except Exception as e:\n",
    "    print(f\"An error occurred: {str(e)}\")\n",
    "\n",
    "finally:\n",
    "    # Optionally, keep the browser open to see the result\n",
    "    time.sleep(10)  # Adjust the sleep time as needed\n",
    "\n",
    "    # Close the browser\n",
    "    driver.quit()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Inclusion Criteria element is now in view.\n",
      "Inclusion Criteria:\n",
      "Men aged from 45 to 70 years\n",
      "Informed consent for PSA measurement for prostate cancer screening\n",
      "Exclusion Criteria element is now in view.\n",
      "Exclusion Criteria:\n",
      "Patients unable to give consent\n",
      "Other Criteria container loaded successfully.\n",
      "Other Criteria:\n",
      "Ages Eligible for Study\n",
      "45 Years to 70 Years (Adult,  Older Adult )\n",
      "Sexes Eligible for Study\n",
      "Male\n",
      "Accepts Healthy Volunteers\n",
      "No\n",
      "Sampling Method\n",
      "Non-Probability Sample\n",
      "Data successfully written to clinical_trial_criteria.txt\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.support.ui import WebDriverWait\n",
    "from selenium.webdriver.support import expected_conditions as EC\n",
    "import time\n",
    "\n",
    "# Initialize the WebDriver (Make sure to specify the correct path to your chromedriver)\n",
    "driver = webdriver.Chrome()\n",
    "\n",
    "# Open the clinical trials page\n",
    "driver.get(\"https://clinicaltrials.gov/study/NCT06626386?aggFilters=status:rec&rank=2#participation-criteria\")\n",
    "\n",
    "# Wait for the page to load completely\n",
    "time.sleep(5)\n",
    "\n",
    "try:\n",
    "    # Scroll the page to bring inclusion criteria into view\n",
    "    inclusion_element = WebDriverWait(driver, 20).until(\n",
    "        EC.presence_of_element_located((By.XPATH, '//*[@id=\"eligibility-criteria-description\"]/div/div/ul[1]')) #//*[@id=\"eligibility-criteria-description\"]/div/div/ul[1]\n",
    "    )\n",
    "    driver.execute_script(\"arguments[0].scrollIntoView(true);\", inclusion_element)\n",
    "    print(\"Inclusion Criteria element is now in view.\")\n",
    "\n",
    "    # Scraping inclusion criteria\n",
    "    inclusion_criteria_text = inclusion_element.text\n",
    "    print(\"Inclusion Criteria:\")\n",
    "    print(inclusion_criteria_text)\n",
    "\n",
    "    # Scroll to exclusion criteria\n",
    "    exclusion_element = WebDriverWait(driver, 20).until(\n",
    "        EC.presence_of_element_located((By.XPATH, '//*[@id=\"eligibility-criteria-description\"]/div/div/ul[2]'))\n",
    "    )\n",
    "    driver.execute_script(\"arguments[0].scrollIntoView(true);\", exclusion_element)\n",
    "    print(\"Exclusion Criteria element is now in view.\")\n",
    "\n",
    "    # Scraping exclusion criteria\n",
    "    exclusion_criteria_text = exclusion_element.text\n",
    "    print(\"Exclusion Criteria:\")\n",
    "    print(exclusion_criteria_text)\n",
    "    \n",
    "    other_criteria = WebDriverWait(driver, 20).until(\n",
    "        EC.presence_of_element_located((By.XPATH, '//*[@id=\"participation-criteria\"]/ctg-participation-criteria/div[2]/div/div[2]/div[2]'))\n",
    "    )\n",
    "    print(\"Other Criteria container loaded successfully.\")\n",
    "    \n",
    "    other_criteria_text = other_criteria.text\n",
    "    print(\"Other Criteria:\")\n",
    "    print(other_criteria_text)\n",
    "\n",
    "    # Writing the scraped data to a text file\n",
    "    with open('clinical_trial_criteria.txt', 'w') as file:\n",
    "        file.write(\"Inclusion Criteria:\\n\")\n",
    "        file.write(inclusion_criteria_text + \"\\n\\n\")\n",
    "        file.write(\"Exclusion Criteria:\\n\")\n",
    "        file.write(exclusion_criteria_text + \"\\n\\n\")\n",
    "        file.write(\"Other Criteria:\\n\")\n",
    "        file.write(other_criteria_text)\n",
    "\n",
    "    print(\"Data successfully written to clinical_trial_criteria.txt\")\n",
    "\n",
    "except Exception as e:\n",
    "    print(f\"An error occurred: {str(e)}\")\n",
    "\n",
    "finally:\n",
    "    # Optionally, keep the browser open to see the result\n",
    "    time.sleep(5)  # Adjust the sleep time as needed\n",
    "\n",
    "    # Close the browser\n",
    "    driver.quit()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Scraping Environment",
   "language": "python",
   "name": "scraping_env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}