b/scripts/scrape_drugs_data.py
+## This script scrapes the drugs.com website for the given drug name and saves the extracted text to a file.
+import requests
+from bs4 import BeautifulSoup
+import html2text
+from multiprocessing import Pool
+import os
+def extract_section_by_id(soup, section_id):
+    """
+    Extracts the section with the given ID from the HTML content.
+    Args:
+        soup (BeautifulSoup): BeautifulSoup object containing the HTML content
+        section_id (str): ID of the section to be extracted
+    Returns:
+        str: HTML content of the section
+    """
+    # Find the section with the given ID
+    section = soup.find('h2', id=section_id)
+    # If section is not found, return an error message
+    if not section:
+        return f"Section with ID '{section_id}' not found."
+    # Extract the content of the section
+    content = []
+    for sibling in section.next_siblings:
+        # Stop at the next section
+        if sibling.name == 'h2':
+            break
+        # Extract the text if the sibling is a paragraph or list
+        if sibling.name in ['p', 'ul', 'ol', 'div']:
+            content.append(str(sibling))
+    return ' '.join(content)
+def scrape_website(args):
+    """
+    Scrapes the drugs.com website for the given drug name and saves the extracted text to a file.
+    Args:
+        args (tuple): Tuple containing the URL and drug name
+    Returns:
+        None
+    """
+    # Unpack the arguments
+    url, drug_name = args
+    print(f"[INFO] Processing: {drug_name}")
+    # Fetch the HTML content
+    try:
+        response = requests.get(url)
+        # If the request fails, return an error message
+        if response.status_code != 200:
+            print(f"[ERROR] Failed to fetch {url} with status code {response.status_code}")
+            return
+        # Parse the HTML content
+        soup = BeautifulSoup(response.content, 'html.parser')
+    except Exception as e:
+        print(f"[ERROR] Exception while fetching {url}: {e}")
+        return
+    # Initialize html2text
+    h = html2text.HTML2Text()
+    h.ignore_links = True
+    h.ignore_images = True
+    # Section IDs to extract
+    section_ids = ["uses", "side-effects"]
+    # Extract and convert HTML to text for each section
+    file_content = ''
+    # Extract the sections one by one
+    for section_id in section_ids:
+        section_html = extract_section_by_id(soup, section_id)
+        section_text = h.handle(section_html)
+        file_content += section_text + "\n\n"
+    # Write the extracted text to a file
+    file_name = f"data/raw_drug_info/{drug_name}.txt"
+    with open(file_name, 'w', encoding='utf-8') as file:
+        file.write(file_content)
+    print(f"[INFO] Processed {drug_name} Successfully")
+if __name__ == "__main__":
+    ## List of drugs to be scrape from drugs.com
+    DRUG_LIST = ["abilify", "infliximab", "rituximab", "etanercept",
+                "Humira", "Enbrel", "Remicade", "Rituxan",
+                "Nexium", "Prevacid", "Prilosec", "Protonix",
+                "Crestor", "Lipitor", "Zocor", "Vytorin",
+                "Victoza", "Byetta", "Januvia", "Onglyza",
+                "Advair", "Symbicort", "Spiriva", "Singulair",
+                "Cialis", "Viagra", "Levitra", "Staxyn",
+                "AndroGel", "Prezista", "Doxycycline", "Cymbalta",
+                "Neupogen", "Epogen", "Aranesp", "Neulasta",
+                "Lunesta", "Ambien", "Provigil", "Nuvigil",
+                "Metoprolol", "Lisinopril", "Amlodipine", "Atorvastatin",
+                "Zoloft", "Lexapro", "Prozac", "Celexa",
+                "Complera", "Atripla"]
+    if not os.path.exists("data/raw_drug_info/"):
+        os.makedirs("data/raw_drug_info/")
+    # Prepare a list of tuples for the Pool
+    tasks = [(f"https://www.drugs.com/{drug}.html", drug) for drug in DRUG_LIST]
+    # Number of processes
+    num_processes = 5  # Adjust this based on your machine's capability
+    # Create a Pool of processes and map the function to the tasks
+    with Pool(num_processes) as pool:
+        pool.map(scrape_website, tasks)