--- a +++ b/Data_loading_in_MySQL.ipynb @@ -0,0 +1,144 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "id": "e71d86f0-1736-48e2-a77e-f1484c224eef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All existing data deleted from medications table.\n", + "1000 rows inserted.\n", + "2000 rows inserted.\n", + "3000 rows inserted.\n", + "4000 rows inserted.\n", + "5000 rows inserted.\n", + "6000 rows inserted.\n", + "7000 rows inserted.\n", + "8000 rows inserted.\n", + "9000 rows inserted.\n", + "10000 rows inserted.\n", + "All data inserted. Total rows: 10000\n", + "Data reload completed successfully.\n", + "MySQL connection is closed\n" + ] + } + ], + "source": [ + "import mysql.connector\n", + "import csv\n", + "import sys\n", + "from datetime import datetime\n", + "\n", + "# Database connection details\n", + "db_config = {\n", + " 'host': 'localhost',\n", + " 'user': 'root',\n", + " 'password': 'HunnyS@1511',\n", + " 'database': 'patient_readmission'\n", + "}\n", + "\n", + "# File path\n", + "file_path = r\"C:\\Users\\Hp\\Desktop\\IITK\\patient readmission project\\medications2.csv\"\n", + "\n", + "def convert_date(date_string):\n", + " try:\n", + " return datetime.strptime(date_string, '%d-%m-%Y').strftime('%Y-%m-%d')\n", + " except ValueError:\n", + " return None\n", + "\n", + "def delete_and_load_data(cursor, conn):\n", + " try:\n", + " # Delete all data from medications table\n", + " cursor.execute(\"DELETE FROM medications\")\n", + " print(\"All existing data deleted from medications table.\")\n", + "\n", + " # Read and insert data from CSV\n", + " with open(file_path, 'r') as file:\n", + " csv_reader = csv.reader(file)\n", + " headers = next(csv_reader) # Get column names\n", + " \n", + " # Prepare the INSERT query\n", + " columns = ', '.join(headers)\n", + " placeholders = ', '.join(['%s'] * len(headers))\n", + " query = f\"INSERT INTO medications ({columns}) VALUES ({placeholders})\"\n", + " \n", + " # Insert data row by row\n", + " for i, row in enumerate(csv_reader, start=1):\n", + " try:\n", + " # Convert dates\n", + " row[3] = convert_date(row[3]) # start_date\n", + " row[4] = convert_date(row[4]) # end_date\n", + " \n", + " cursor.execute(query, row)\n", + " if i % 1000 == 0: # Commit every 1000 rows\n", + " conn.commit()\n", + " print(f\"{i} rows inserted.\")\n", + " except mysql.connector.Error as err:\n", + " print(f\"Error at row {i}: {err}\")\n", + " print(f\"Problematic row: {row}\")\n", + " conn.rollback()\n", + " \n", + " # Final commit\n", + " conn.commit()\n", + " print(f\"All data inserted. Total rows: {i}\")\n", + "\n", + " except mysql.connector.Error as err:\n", + " print(f\"Error: {err}\")\n", + " conn.rollback()\n", + "\n", + "def main():\n", + " try:\n", + " conn = mysql.connector.connect(**db_config)\n", + " cursor = conn.cursor()\n", + "\n", + " delete_and_load_data(cursor, conn)\n", + "\n", + " print(\"Data reload completed successfully.\")\n", + "\n", + " except mysql.connector.Error as err:\n", + " print(f\"Database connection error: {err}\")\n", + " finally:\n", + " if 'conn' in locals() and conn.is_connected():\n", + " cursor.close()\n", + " conn.close()\n", + " print(\"MySQL connection is closed\")\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99c6729e-7692-4d82-8d7d-250cdd75baf9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}