Diff of /NESTCOMPETIITION.ipynb [000000] .. [8409ad]

Switch to side-by-side view

--- a
+++ b/NESTCOMPETIITION.ipynb
@@ -0,0 +1,3322 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU",
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "e41925970dc94c2aa90b4da8acac85cf": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_c1f0f99a4a4341d18e0039a71b305111",
+              "IPY_MODEL_d54d01c91e694b3eb180170cddd1211c",
+              "IPY_MODEL_3c707baf63cf4cbf9732e39ddeb84c7c"
+            ],
+            "layout": "IPY_MODEL_08955c157996426eaa089d878b8d820f"
+          }
+        },
+        "c1f0f99a4a4341d18e0039a71b305111": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_da9c8ff09bed4c6989b0246c7bdc8fd7",
+            "placeholder": "​",
+            "style": "IPY_MODEL_b78266bbcc0e4ed0bcaab9009b30d98e",
+            "value": "tokenizer_config.json: 100%"
+          }
+        },
+        "d54d01c91e694b3eb180170cddd1211c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_2fd8c28ebf1b47f78e2e0093608407cf",
+            "max": 28,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_8c728d06a0814cc8b83931b24f0115d9",
+            "value": 28
+          }
+        },
+        "3c707baf63cf4cbf9732e39ddeb84c7c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_da4b0713ce9f47208a39ec89515c30e4",
+            "placeholder": "​",
+            "style": "IPY_MODEL_b2fd795e6a534319a87213f05e824315",
+            "value": " 28.0/28.0 [00:00<00:00, 1.45kB/s]"
+          }
+        },
+        "08955c157996426eaa089d878b8d820f": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "da9c8ff09bed4c6989b0246c7bdc8fd7": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "b78266bbcc0e4ed0bcaab9009b30d98e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "2fd8c28ebf1b47f78e2e0093608407cf": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "8c728d06a0814cc8b83931b24f0115d9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "da4b0713ce9f47208a39ec89515c30e4": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "b2fd795e6a534319a87213f05e824315": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "0722a8ceb94345a79e5bdc3b84f71d7f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_7c04cff87d1041b1878488f5014f9b7c",
+              "IPY_MODEL_0ef4539514a1462caacf58e5c3fa516b",
+              "IPY_MODEL_4c61a2b37c4c4ffbb3a10d270140724f"
+            ],
+            "layout": "IPY_MODEL_d1288d4268e74bd384064c4f2cfbfa7b"
+          }
+        },
+        "7c04cff87d1041b1878488f5014f9b7c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_1c84ce5cb56a4c7286c0f5981b8fbd5a",
+            "placeholder": "​",
+            "style": "IPY_MODEL_61a01f78b9714e4aa0d869d8e6563e81",
+            "value": "config.json: 100%"
+          }
+        },
+        "0ef4539514a1462caacf58e5c3fa516b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_180753d93be944e98c47bba836397ba0",
+            "max": 385,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_db137743dfc24b6b84c6c15bcd5b5766",
+            "value": 385
+          }
+        },
+        "4c61a2b37c4c4ffbb3a10d270140724f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_16f88feb88fc43b5b381521d91b02150",
+            "placeholder": "​",
+            "style": "IPY_MODEL_9b2a4006744a4940aa64fd9e41d62d4e",
+            "value": " 385/385 [00:00<00:00, 21.0kB/s]"
+          }
+        },
+        "d1288d4268e74bd384064c4f2cfbfa7b": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "1c84ce5cb56a4c7286c0f5981b8fbd5a": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "61a01f78b9714e4aa0d869d8e6563e81": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "180753d93be944e98c47bba836397ba0": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "db137743dfc24b6b84c6c15bcd5b5766": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "16f88feb88fc43b5b381521d91b02150": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "9b2a4006744a4940aa64fd9e41d62d4e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "d55048b2772b47eab27637e9aa42d4dc": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_a70574b040b14832abb25d2eb040fb0a",
+              "IPY_MODEL_e5c3bf5ade9a405094a1673bc2beb795",
+              "IPY_MODEL_3d7c0e5ba179442eb417435ff72ae85a"
+            ],
+            "layout": "IPY_MODEL_b8aea249ccf04a619c1e926e7b6b6bbe"
+          }
+        },
+        "a70574b040b14832abb25d2eb040fb0a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_73554830b9524f1b8b8714ff33c3b8e8",
+            "placeholder": "​",
+            "style": "IPY_MODEL_15bf509be6d946e187e14fd6854950a9",
+            "value": "vocab.txt: 100%"
+          }
+        },
+        "e5c3bf5ade9a405094a1673bc2beb795": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_65855073adcd4434b8e9af65ecc59915",
+            "max": 226150,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_f3485f93a7374df49cc200d18e9ac49e",
+            "value": 226150
+          }
+        },
+        "3d7c0e5ba179442eb417435ff72ae85a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_53d6422031bf48dab34a3b47e237da93",
+            "placeholder": "​",
+            "style": "IPY_MODEL_1ebb572142e8490199aa8ffce39df341",
+            "value": " 226k/226k [00:00<00:00, 3.21MB/s]"
+          }
+        },
+        "b8aea249ccf04a619c1e926e7b6b6bbe": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "73554830b9524f1b8b8714ff33c3b8e8": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "15bf509be6d946e187e14fd6854950a9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "65855073adcd4434b8e9af65ecc59915": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f3485f93a7374df49cc200d18e9ac49e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "53d6422031bf48dab34a3b47e237da93": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "1ebb572142e8490199aa8ffce39df341": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "efe460065f0448acbf75200543187bb0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_0e6e8bc125874564bb46431ac85524dc",
+              "IPY_MODEL_30d5690beb7341dc8b37de5a1e0812ab",
+              "IPY_MODEL_e8c76754ea784709bae0c59843c61279"
+            ],
+            "layout": "IPY_MODEL_67690c975154491b96ed9091db152a0f"
+          }
+        },
+        "0e6e8bc125874564bb46431ac85524dc": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_74bbdf59962d41059f018e6eab297968",
+            "placeholder": "​",
+            "style": "IPY_MODEL_2c20eb99653d4026ace81ccbe858b479",
+            "value": "pytorch_model.bin: 100%"
+          }
+        },
+        "30d5690beb7341dc8b37de5a1e0812ab": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_183aee869ce34eaca26f1cf533559caa",
+            "max": 440474434,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_d4622e6a3301483c91a66836d5724e0e",
+            "value": 440474434
+          }
+        },
+        "e8c76754ea784709bae0c59843c61279": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_d9c5d0256f534e4cbd82797e1e9f3ccd",
+            "placeholder": "​",
+            "style": "IPY_MODEL_0dc7017260764c9dbdcb9167e7a40b15",
+            "value": " 440M/440M [00:04<00:00, 108MB/s]"
+          }
+        },
+        "67690c975154491b96ed9091db152a0f": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "74bbdf59962d41059f018e6eab297968": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "2c20eb99653d4026ace81ccbe858b479": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "183aee869ce34eaca26f1cf533559caa": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d4622e6a3301483c91a66836d5724e0e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "d9c5d0256f534e4cbd82797e1e9f3ccd": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "0dc7017260764c9dbdcb9167e7a40b15": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "yBhXMsR3OE85",
+        "outputId": "f586f5e1-50a3-446a-8258-0dde1c57e636"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-1-8174d4ba0823>:8: DtypeWarning: Columns (0,18,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511,512,513,514,515,516,517,518,519,520,521,522,523,524,525,526,527,528,529,530,531,532,533,534,535,536,537,538,539,540,541,542,543,544,545,546,547,548,549,550,551,552,553,554,555,556,557,558,559,560,561,562,563,564,565,566,567,568,569,570,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,680,681,682,683,684,685,686,687,688,689,690,691,692,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,732,733,734,735,736,737,738,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,765,767,768,769,771,772,773,775,776) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+            "  df = pd.read_csv(\"/content/usecase_2_.csv\", quoting=csv.QUOTE_MINIMAL, escapechar='\\\\', on_bad_lines='skip')\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Successfully read the CSV with skipped lines.\n",
+            "Check the problematic rows (if any) by inspecting the original CSV file around row 785.\n"
+          ]
+        }
+      ],
+      "source": [
+        "import pandas as pd\n",
+        "import csv\n",
+        "\n",
+        "# Use csv.QUOTE_MINIMAL to only quote where necessary\n",
+        "# Try to read the file with error_bad_lines=False to skip problematic lines\n",
+        "# and see if you can identify the issue in the skipped rows\n",
+        "try:\n",
+        "    df = pd.read_csv(\"/content/usecase_2_.csv\", quoting=csv.QUOTE_MINIMAL, escapechar='\\\\', on_bad_lines='skip')\n",
+        "    print(\"Successfully read the CSV with skipped lines.\")\n",
+        "    print(\"Check the problematic rows (if any) by inspecting the original CSV file around row 785.\")\n",
+        "except pd.errors.ParserError as e:\n",
+        "    print(f\"Error: {e}\")\n",
+        "    print(\"The on_bad_lines='skip' approach also failed. This likely indicates a more severe issue within the CSV file structure.\")\n",
+        "    print(\"Possible solutions:\")\n",
+        "    print(\"  1. Manually inspect row 785 and the surrounding rows in your CSV file for unescaped quotes or incorrect line endings.\")\n",
+        "    print(\"  2. If you have control over the CSV generation process, ensure proper escaping of quotes and consistent line endings.\")\n",
+        "    print(\"  3. Try using a different text editor to open the CSV file and check for any hidden characters or encoding issues.\")\n",
+        "    print(\"  4. If the file is large, consider processing it in smaller chunks to identify the specific area causing the problem.\")\n",
+        "# escapechar is used to escape special characters (like quotes) within the fields.\n",
+        "# Using backslash here is a common choice."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df = df.loc[:, ~df.columns.str.contains('^Unnamed')]"
+      ],
+      "metadata": {
+        "id": "ONTOsvE2P2hT"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(df.head())"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "0ICHZF9HP4vp",
+        "outputId": "4fdf71c5-22f2-44c7-b559-8f40b5d8b75b"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "    NCT Number                                        Study Title  \\\n",
+            "0  NCT04841499  Effects of a Seven-day BASIS™ Supplementation ...   \n",
+            "1  NCT03020641          Peritoneal Damage in Laparoscopic Surgery   \n",
+            "2  NCT03727620  Doxycycline in the Treatment of Aggressive Per...   \n",
+            "3  NCT03162926  A Safety and Tolerability Study of VC-02™ Comb...   \n",
+            "4  NCT04434313  Treatment of Hemiparetic Gait Impairments Usin...   \n",
+            "\n",
+            "                                      Study URL Acronym Study Status  \\\n",
+            "0  https://clinicaltrials.gov/study/NCT04841499     NaN    COMPLETED   \n",
+            "1  https://clinicaltrials.gov/study/NCT03020641     NaN    COMPLETED   \n",
+            "2  https://clinicaltrials.gov/study/NCT03727620     NaN    COMPLETED   \n",
+            "3  https://clinicaltrials.gov/study/NCT03162926     NaN    COMPLETED   \n",
+            "4  https://clinicaltrials.gov/study/NCT04434313     NaN    COMPLETED   \n",
+            "\n",
+            "                                       Brief Summary Study Results  \\\n",
+            "0  The purpose of this study is to determine whet...            NO   \n",
+            "1  The investigators hypothesized that applying a...           YES   \n",
+            "2  The aim of the study was to compare the clinic...            NO   \n",
+            "3  The purpose of this trial is to test if VC-02™...            NO   \n",
+            "4  The objective of this research is to investiga...            NO   \n",
+            "\n",
+            "                                          Conditions  \\\n",
+            "0                                          Menopause   \n",
+            "1                                  Peritoneal Damage   \n",
+            "2                           Aggressive Periodontitis   \n",
+            "3                           Type 1 Diabetes Mellitus   \n",
+            "4  Telemedicine|Gait, Hemiplegic|Gait Disorders, ...   \n",
+            "\n",
+            "                                       Interventions  \\\n",
+            "0  DRUG: BASIS™ (Crystalline Nicotinamide Ribosid...   \n",
+            "1  PROCEDURE: Low pressure pneumoperitoneum|PROCE...   \n",
+            "2  DRUG: amoxicillin plus metronidazole|DRUG: Dox...   \n",
+            "3  COMBINATION_PRODUCT: VC-02 Combination Product...   \n",
+            "4  DEVICE: Delivery of iStride™ device gait treat...   \n",
+            "\n",
+            "                            Primary Outcome Measures  ...                 Age  \\\n",
+            "0  Production of Estradiol, To determine whether ...  ...  ADULT, OLDER_ADULT   \n",
+            "1  Inflammatory Peritoneal Markers, logaritmic le...  ...  ADULT, OLDER_ADULT   \n",
+            "2  Decrease of periodontal pockets ≥ 4mm, • Probi...  ...        CHILD, ADULT   \n",
+            "3  Incidence of all adverse events reported for s...  ...  ADULT, OLDER_ADULT   \n",
+            "4  Feasibility of safely implementing the treatme...  ...  ADULT, OLDER_ADULT   \n",
+            "\n",
+            "          Phases Enrollment Funder Type      Study Type  \\\n",
+            "0            NaN         40       OTHER  INTERVENTIONAL   \n",
+            "1            NaN        100       OTHER  INTERVENTIONAL   \n",
+            "2  PHASE1|PHASE2         24       OTHER  INTERVENTIONAL   \n",
+            "3         PHASE1          3    INDUSTRY  INTERVENTIONAL   \n",
+            "4            NaN          6    INDUSTRY  INTERVENTIONAL   \n",
+            "\n",
+            "                                        Study Design            Other IDs  \\\n",
+            "0  Allocation: NA|Intervention Model: SINGLE_GROU...           USAH-EH301   \n",
+            "1  Allocation: RANDOMIZED|Intervention Model: PAR...          A-CGyD-2017   \n",
+            "2  Allocation: NON_RANDOMIZED|Intervention Model:...            DOXYAPG18   \n",
+            "3  Allocation: NA|Intervention Model: SINGLE_GROU...             VC02-102   \n",
+            "4  Allocation: NA|Intervention Model: SINGLE_GROU...  MOT-TELE-2020-04-00   \n",
+            "\n",
+            "                                           Locations  \\\n",
+            "0  University of South Alabama, Mobile, Alabama, ...   \n",
+            "1       Ramon y Cajal Hospital, Madrid, 28034, Spain   \n",
+            "2                  BENRACHADI Latifa, Rabat, Morocco   \n",
+            "3   University of Alberta, Edmonton, Alberta, Canada   \n",
+            "4  Moterum Technologies, Inc. (study location: ho...   \n",
+            "\n",
+            "                                     Study Documents Time taken for Enrollment  \n",
+            "0                                                NaN                         3  \n",
+            "1  Study Protocol and Statistical Analysis Plan, ...                        27  \n",
+            "2                                                NaN                         5  \n",
+            "3                                                NaN                         7  \n",
+            "4                                                NaN                         8  \n",
+            "\n",
+            "[5 rows x 25 columns]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "df = df.drop(df.columns[0], axis=1)"
+      ],
+      "metadata": {
+        "id": "k-Mb2MQbP60s"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(df.dtypes)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "FKeOAM3tP9-M",
+        "outputId": "eca2d10b-a590-4ecf-e8a6-17816e8eace6"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Study Title                   object\n",
+            "Study URL                     object\n",
+            "Acronym                       object\n",
+            "Study Status                  object\n",
+            "Brief Summary                 object\n",
+            "Study Results                 object\n",
+            "Conditions                    object\n",
+            "Interventions                 object\n",
+            "Primary Outcome Measures      object\n",
+            "Secondary Outcome Measures    object\n",
+            "Other Outcome Measures        object\n",
+            "Sponsor                       object\n",
+            "Collaborators                 object\n",
+            "Sex                           object\n",
+            "Age                           object\n",
+            "Phases                        object\n",
+            "Enrollment                    object\n",
+            "Funder Type                   object\n",
+            "Study Type                    object\n",
+            "Study Design                  object\n",
+            "Other IDs                     object\n",
+            "Locations                     object\n",
+            "Study Documents               object\n",
+            "Time taken for Enrollment     object\n",
+            "dtype: object\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import re\n",
+        "from nltk.corpus import stopwords\n",
+        "from nltk.tokenize import word_tokenize\n",
+        "import nltk\n",
+        "\n",
+        "# Download necessary NLTK data\n",
+        "nltk.download('stopwords')\n",
+        "nltk.download('punkt')\n",
+        "nltk.download('punkt_tab') # Download punkt_tab data\n",
+        "\n",
+        "# Define the columns to process\n",
+        "text_columns = [\n",
+        "    'Study Title', 'Study Status', 'Brief Summary', 'Study Results',\n",
+        "    'Conditions', 'Interventions', 'Primary Outcome Measures',\n",
+        "    'Funder Type', 'Study Type', 'Study Design', 'Locations'\n",
+        "]\n",
+        "\n",
+        "# Define a function to preprocess text (remove prepositions and stopwords)\n",
+        "def preprocess_text(text):\n",
+        "    if pd.isnull(text):\n",
+        "        return text  # Skip processing for NaN values\n",
+        "    # Tokenize and remove punctuation\n",
+        "    words = word_tokenize(re.sub(r'[^\\w\\s]', '', text.lower()))\n",
+        "    # Remove stopwords (including prepositions)\n",
+        "    filtered_words = [word for word in words if word not in stopwords.words('english')]\n",
+        "    return ' '.join(filtered_words)\n",
+        "\n",
+        "# Apply preprocessing to specified columns\n",
+        "for column in text_columns:\n",
+        "    if column in df.columns:\n",
+        "        df[column] = df[column].apply(preprocess_text)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "EK48M-dFQBJW",
+        "outputId": "7f1601b1-da77-474b-efc1-9ea5b28607dc"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping corpora/stopwords.zip.\n",
+            "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping tokenizers/punkt.zip.\n",
+            "[nltk_data] Downloading package punkt_tab to /root/nltk_data...\n",
+            "[nltk_data]   Unzipping tokenizers/punkt_tab.zip.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(df.head)"
+      ],
+      "metadata": {
+        "id": "oizcbyiPQE-Q",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "52cd4dd2-688f-42f9-d44a-41c064365da0"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "<bound method NDFrame.head of                                             Study Title  \\\n",
+            "0     effects sevenday basis supplementation menopau...   \n",
+            "1                peritoneal damage laparoscopic surgery   \n",
+            "2        doxycycline treatment aggressive periodontitis   \n",
+            "3     safety tolerability study vc02 combination pro...   \n",
+            "4     treatment hemiparetic gait impairments using t...   \n",
+            "...                                                 ...   \n",
+            "3098  comparative effectiveness costeffectiveness ch...   \n",
+            "3099  impact intestinal microbiota treatment ceftria...   \n",
+            "3100                               mental balance study   \n",
+            "3101       glymphatic kinetics healthy adult volunteers   \n",
+            "3102  utilizing mychart assess effectiveness interve...   \n",
+            "\n",
+            "                                         Study URL    Acronym Study Status  \\\n",
+            "0     https://clinicaltrials.gov/study/NCT04841499        NaN    completed   \n",
+            "1     https://clinicaltrials.gov/study/NCT03020641        NaN    completed   \n",
+            "2     https://clinicaltrials.gov/study/NCT03727620        NaN    completed   \n",
+            "3     https://clinicaltrials.gov/study/NCT03162926        NaN    completed   \n",
+            "4     https://clinicaltrials.gov/study/NCT04434313        NaN    completed   \n",
+            "...                                            ...        ...          ...   \n",
+            "3098  https://clinicaltrials.gov/study/NCT03294785        NaN    completed   \n",
+            "3099  https://clinicaltrials.gov/study/NCT03179384  CEFIMPACT    completed   \n",
+            "3100  https://clinicaltrials.gov/study/NCT05757050        NaN    completed   \n",
+            "3101  https://clinicaltrials.gov/study/NCT03218111        NaN    completed   \n",
+            "3102  https://clinicaltrials.gov/study/NCT05222464        NaN    completed   \n",
+            "\n",
+            "                                          Brief Summary Study Results  \\\n",
+            "0     purpose study determine whether short suppleme...                 \n",
+            "1     investigators hypothesized applying low intrap...           yes   \n",
+            "2     aim study compare clinical effects systemic us...                 \n",
+            "3     purpose trial test vc02 combination product im...                 \n",
+            "4     objective research investigate feasibility del...                 \n",
+            "...                                                 ...           ...   \n",
+            "3098  multicenter randomized controlled trial assess...                 \n",
+            "3099  acute pyelonephritis apn corresponds infection...                 \n",
+            "3100  proposed design randomised doubleblind control...                 \n",
+            "3101  study done order see gadoliniumbased mri contr...                 \n",
+            "3102  vasomotor symptoms vms common consequence syst...                 \n",
+            "\n",
+            "                                             Conditions  \\\n",
+            "0                                             menopause   \n",
+            "1                                     peritoneal damage   \n",
+            "2                              aggressive periodontitis   \n",
+            "3                              type 1 diabetes mellitus   \n",
+            "4     telemedicinegait hemiplegicgait disorders neur...   \n",
+            "...                                                 ...   \n",
+            "3098                                  chronic neck pain   \n",
+            "3099                               pyelonephritis acute   \n",
+            "3100        mental health wellness 1work related stress   \n",
+            "3101                                            healthy   \n",
+            "3102                                      breast cancer   \n",
+            "\n",
+            "                                          Interventions  \\\n",
+            "0     drug basis crystalline nicotinamide riboside 2...   \n",
+            "1     procedure low pressure pneumoperitoneumprocedu...   \n",
+            "2     drug amoxicillin plus metronidazoledrug doxycy...   \n",
+            "3     combination_product vc02 combination product a...   \n",
+            "4     device delivery istride device gait treatment ...   \n",
+            "...                                                 ...   \n",
+            "3098  procedure chuna manual therapydrug conventiona...   \n",
+            "3099                                   drug ceftriaxone   \n",
+            "3100  dietary_supplement refocus verum tabletsdietar...   \n",
+            "3101                         mr imagingother ctguidance   \n",
+            "3102                           standard care treatments   \n",
+            "\n",
+            "                               Primary Outcome Measures  \\\n",
+            "0     production estradiol determine whether short s...   \n",
+            "1     inflammatory peritoneal markers logaritmic lev...   \n",
+            "2     decrease periodontal pockets 4mm probing pocke...   \n",
+            "3     incidence adverse events reported subjects thr...   \n",
+            "4     feasibility safely implementing treatment prot...   \n",
+            "...                                                 ...   \n",
+            "3098  difference visual analogue scale vas neck pain...   \n",
+            "3099  emergence ceftriaxoneresistant enterobacteriac...   \n",
+            "3100  change cognitive function cognitive domain fac...   \n",
+            "3101  drug distribution time drug distribution gadol...   \n",
+            "3102  patient engagement mychart accessibility user ...   \n",
+            "\n",
+            "                             Secondary Outcome Measures  ...  \\\n",
+            "0                                                   NaN  ...   \n",
+            "1                                                   NaN  ...   \n",
+            "2     Plaque index decrease, Plaque index was assess...  ...   \n",
+            "3                                                   NaN  ...   \n",
+            "4     Feasibility of screening criteria, To enroll p...  ...   \n",
+            "...                                                 ...  ...   \n",
+            "3098  Difference between visual analogue scale (VAS)...  ...   \n",
+            "3099                                                NaN  ...   \n",
+            "3100  Profile of Mood States (POMS), 35-item measure...  ...   \n",
+            "3101                                                NaN  ...   \n",
+            "3102  Hot Flash Severity (MyChart Feasibility), Hot ...  ...   \n",
+            "\n",
+            "                     Age         Phases Enrollment Funder Type  \\\n",
+            "0     ADULT, OLDER_ADULT            NaN         40               \n",
+            "1     ADULT, OLDER_ADULT            NaN        100               \n",
+            "2           CHILD, ADULT  PHASE1|PHASE2         24               \n",
+            "3     ADULT, OLDER_ADULT         PHASE1          3    industry   \n",
+            "4     ADULT, OLDER_ADULT            NaN          6    industry   \n",
+            "...                  ...            ...        ...         ...   \n",
+            "3098               ADULT            NaN        108               \n",
+            "3099  ADULT, OLDER_ADULT         PHASE4          9               \n",
+            "3100  ADULT, OLDER_ADULT            NaN         36    industry   \n",
+            "3101  ADULT, OLDER_ADULT            NaN         19               \n",
+            "3102  ADULT, OLDER_ADULT         PHASE4         56               \n",
+            "\n",
+            "          Study Type                                       Study Design  \\\n",
+            "0     interventional  allocation naintervention model single_groupma...   \n",
+            "1     interventional  allocation randomizedintervention model parall...   \n",
+            "2     interventional  allocation non_randomizedintervention model pa...   \n",
+            "3     interventional  allocation naintervention model single_groupma...   \n",
+            "4     interventional  allocation naintervention model single_groupma...   \n",
+            "...              ...                                                ...   \n",
+            "3098  interventional  allocation randomizedintervention model parall...   \n",
+            "3099  interventional  allocation naintervention model single_groupma...   \n",
+            "3100  interventional  allocation randomizedintervention model crosso...   \n",
+            "3101  interventional  allocation naintervention model single_groupma...   \n",
+            "3102  interventional  allocation naintervention model single_groupma...   \n",
+            "\n",
+            "                    Other IDs  \\\n",
+            "0                  USAH-EH301   \n",
+            "1                 A-CGyD-2017   \n",
+            "2                   DOXYAPG18   \n",
+            "3                    VC02-102   \n",
+            "4         MOT-TELE-2020-04-00   \n",
+            "...                       ...   \n",
+            "3098            JS-CT-2016-14   \n",
+            "3099                16-AOI-02   \n",
+            "3100              5'000'750-1   \n",
+            "3101               1609017536   \n",
+            "3102  REaCT-Hot Flashes Pilot   \n",
+            "\n",
+            "                                              Locations  \\\n",
+            "0     university south alabama mobile alabama 36604 ...   \n",
+            "1               ramon cajal hospital madrid 28034 spain   \n",
+            "2                       benrachadi latifa rabat morocco   \n",
+            "3            university alberta edmonton alberta canada   \n",
+            "4     moterum technologies inc study location homes ...   \n",
+            "...                                                 ...   \n",
+            "3098  bucheon jaseng hospital korean medicine bucheo...   \n",
+            "3099                      chu de nice nice 06003 france   \n",
+            "3100  northumbria university newcastle upon tyne tyn...   \n",
+            "3101  weill cornell medical college new york new yor...   \n",
+            "3102  ottawa hospital cancer centre ottawa ontario c...   \n",
+            "\n",
+            "                                        Study Documents  \\\n",
+            "0                                                   NaN   \n",
+            "1     Study Protocol and Statistical Analysis Plan, ...   \n",
+            "2                                                   NaN   \n",
+            "3                                                   NaN   \n",
+            "4                                                   NaN   \n",
+            "...                                                 ...   \n",
+            "3098                                                NaN   \n",
+            "3099                                                NaN   \n",
+            "3100                                                NaN   \n",
+            "3101                                                NaN   \n",
+            "3102                                                NaN   \n",
+            "\n",
+            "     Time taken for Enrollment  \n",
+            "0                            3  \n",
+            "1                           27  \n",
+            "2                            5  \n",
+            "3                            7  \n",
+            "4                            8  \n",
+            "...                        ...  \n",
+            "3098                        10  \n",
+            "3099                        20  \n",
+            "3100                         6  \n",
+            "3101                        12  \n",
+            "3102                         5  \n",
+            "\n",
+            "[3103 rows x 24 columns]>\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import nltk\n",
+        "nltk.download('wordnet') # Download the wordnet dataset\n",
+        "\n",
+        "from nltk.corpus import wordnet\n",
+        "\n",
+        "# ... (Rest of your code)\n",
+        "\n",
+        "# Function to expand synonyms for a given word\n",
+        "def get_synonyms(word):\n",
+        "    synonyms = set()\n",
+        "    for syn in wordnet.synsets(word):\n",
+        "        for lemma in syn.lemmas():\n",
+        "            synonyms.add(lemma.name().replace('_', ' '))\n",
+        "    return list(synonyms)\n",
+        "\n",
+        "# Function to expand synonyms in a text\n",
+        "def synonym_expansion(text):\n",
+        "    if pd.isnull(text):\n",
+        "        return text  # Skip processing for NaN values\n",
+        "    words = text.split()  # Split the cleaned text into words\n",
+        "    expanded_text = []\n",
+        "    for word in words:\n",
+        "        expanded_text.append(word)  # Add the original word\n",
+        "        expanded_text.extend(get_synonyms(word))  # Add synonyms\n",
+        "    return ' '.join(set(expanded_text))  # Deduplicate and join back to a string\n",
+        "\n",
+        "# Apply synonym expansion to the same text columns\n",
+        "for column in text_columns:\n",
+        "    if column in df.columns:\n",
+        "        df[column] = df[column].apply(synonym_expansion)\n",
+        "\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "VxCT0bvfQJF6",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "8997c388-c278-40cb-9d6a-557b1fcea3a6"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "[nltk_data] Downloading package wordnet to /root/nltk_data...\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.preprocessing import LabelEncoder\n",
+        "\n",
+        "# List of columns to label encode\n",
+        "label_columns = ['Sex', 'Age', 'Phases', 'Enrollment']\n",
+        "\n",
+        "# Initialize a dictionary to store encoders for each column (optional: for inverse transformations later)\n",
+        "label_encoders = {}\n",
+        "\n",
+        "# Apply label encoding to each specified column\n",
+        "for column in label_columns:\n",
+        "    if column in df.columns:\n",
+        "        le = LabelEncoder()\n",
+        "        df[column] = le.fit_transform(df[column].astype(str))  # Ensure all data is treated as string\n",
+        "        label_encoders[column] = le"
+      ],
+      "metadata": {
+        "id": "tBlxiVGRQMwy"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "columns_to_check = [\n",
+        "    'Sex', 'Age', 'Phases', 'Enrollment', 'Study Title', 'Study Status', 'Brief Summary',\n",
+        "    'Study Results', 'Conditions', 'Interventions', 'Primary Outcome Measures',\n",
+        "    'Funder Type', 'Study Type', 'Study Design', 'Locations', 'Time taken for Enrollment'\n",
+        "]\n",
+        "\n",
+        "# Filter rows where all specified columns have non-null values\n",
+        "filtered_df = df.dropna(subset=columns_to_check)\n",
+        "\n",
+        "# Select the top 1000 rows\n",
+        "top_1000_rows = filtered_df.head(1000)"
+      ],
+      "metadata": {
+        "id": "LFnloowZQPqa"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from transformers import AutoTokenizer, AutoModel\n",
+        "import torch\n",
+        "\n",
+        "# Load the PubMedBERT model and tokenizer\n",
+        "model_name = \"microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext\"\n",
+        "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+        "model = AutoModel.from_pretrained(model_name)\n",
+        "\n",
+        "# Function to generate embeddings\n",
+        "def generate_embedding(text, tokenizer, model):\n",
+        "    if pd.isna(text) or text.strip() == \"\":  # Handle empty or NaN values\n",
+        "        return torch.zeros(768).tolist()  # Return a zero vector if the text is empty\n",
+        "    inputs = tokenizer(text, return_tensors=\"pt\", truncation=True, max_length=512, padding=True)\n",
+        "    with torch.no_grad():\n",
+        "        outputs = model(**inputs)\n",
+        "    return outputs.last_hidden_state.mean(dim=1).squeeze().tolist()  # Mean pooling of embeddings\n",
+        "\n",
+        "# Columns to process\n",
+        "text_columns = [\n",
+        "    'Study Title', 'Study Status', 'Brief Summary', 'Study Results',\n",
+        "    'Conditions', 'Interventions', 'Primary Outcome Measures',\n",
+        "    'Funder Type', 'Study Type', 'Study Design', 'Locations'\n",
+        "]\n",
+        "\n",
+        "# Generate embeddings for each column using PubMedBERT\n",
+        "for col in text_columns:\n",
+        "    print(f\"Generating embeddings for column: {col} using PubMedBERT\")\n",
+        "    top_1000_rows[f\"{col}_embedding\"] = top_1000_rows[col].apply(\n",
+        "        lambda x: generate_embedding(x, tokenizer, model)\n",
+        "    )\n"
+      ],
+      "metadata": {
+        "id": "WB6AGY7uQSIR",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000,
+          "referenced_widgets": [
+            "e41925970dc94c2aa90b4da8acac85cf",
+            "c1f0f99a4a4341d18e0039a71b305111",
+            "d54d01c91e694b3eb180170cddd1211c",
+            "3c707baf63cf4cbf9732e39ddeb84c7c",
+            "08955c157996426eaa089d878b8d820f",
+            "da9c8ff09bed4c6989b0246c7bdc8fd7",
+            "b78266bbcc0e4ed0bcaab9009b30d98e",
+            "2fd8c28ebf1b47f78e2e0093608407cf",
+            "8c728d06a0814cc8b83931b24f0115d9",
+            "da4b0713ce9f47208a39ec89515c30e4",
+            "b2fd795e6a534319a87213f05e824315",
+            "0722a8ceb94345a79e5bdc3b84f71d7f",
+            "7c04cff87d1041b1878488f5014f9b7c",
+            "0ef4539514a1462caacf58e5c3fa516b",
+            "4c61a2b37c4c4ffbb3a10d270140724f",
+            "d1288d4268e74bd384064c4f2cfbfa7b",
+            "1c84ce5cb56a4c7286c0f5981b8fbd5a",
+            "61a01f78b9714e4aa0d869d8e6563e81",
+            "180753d93be944e98c47bba836397ba0",
+            "db137743dfc24b6b84c6c15bcd5b5766",
+            "16f88feb88fc43b5b381521d91b02150",
+            "9b2a4006744a4940aa64fd9e41d62d4e",
+            "d55048b2772b47eab27637e9aa42d4dc",
+            "a70574b040b14832abb25d2eb040fb0a",
+            "e5c3bf5ade9a405094a1673bc2beb795",
+            "3d7c0e5ba179442eb417435ff72ae85a",
+            "b8aea249ccf04a619c1e926e7b6b6bbe",
+            "73554830b9524f1b8b8714ff33c3b8e8",
+            "15bf509be6d946e187e14fd6854950a9",
+            "65855073adcd4434b8e9af65ecc59915",
+            "f3485f93a7374df49cc200d18e9ac49e",
+            "53d6422031bf48dab34a3b47e237da93",
+            "1ebb572142e8490199aa8ffce39df341",
+            "efe460065f0448acbf75200543187bb0",
+            "0e6e8bc125874564bb46431ac85524dc",
+            "30d5690beb7341dc8b37de5a1e0812ab",
+            "e8c76754ea784709bae0c59843c61279",
+            "67690c975154491b96ed9091db152a0f",
+            "74bbdf59962d41059f018e6eab297968",
+            "2c20eb99653d4026ace81ccbe858b479",
+            "183aee869ce34eaca26f1cf533559caa",
+            "d4622e6a3301483c91a66836d5724e0e",
+            "d9c5d0256f534e4cbd82797e1e9f3ccd",
+            "0dc7017260764c9dbdcb9167e7a40b15"
+          ]
+        },
+        "outputId": "4ca695b1-5a35-4c3d-d856-5e6450e388a2"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
+            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+            "You will be able to reuse this secret in all of your notebooks.\n",
+            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+            "  warnings.warn(\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "e41925970dc94c2aa90b4da8acac85cf"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "0722a8ceb94345a79e5bdc3b84f71d7f"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "d55048b2772b47eab27637e9aa42d4dc"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "efe460065f0448acbf75200543187bb0"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Generating embeddings for column: Study Title using PubMedBERT\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-11-7092d7125584>:28: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"{col}_embedding\"] = top_1000_rows[col].apply(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Generating embeddings for column: Study Status using PubMedBERT\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-11-7092d7125584>:28: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"{col}_embedding\"] = top_1000_rows[col].apply(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Generating embeddings for column: Brief Summary using PubMedBERT\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-11-7092d7125584>:28: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"{col}_embedding\"] = top_1000_rows[col].apply(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Generating embeddings for column: Study Results using PubMedBERT\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-11-7092d7125584>:28: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"{col}_embedding\"] = top_1000_rows[col].apply(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Generating embeddings for column: Conditions using PubMedBERT\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-11-7092d7125584>:28: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"{col}_embedding\"] = top_1000_rows[col].apply(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Generating embeddings for column: Interventions using PubMedBERT\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-11-7092d7125584>:28: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"{col}_embedding\"] = top_1000_rows[col].apply(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Generating embeddings for column: Primary Outcome Measures using PubMedBERT\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-11-7092d7125584>:28: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"{col}_embedding\"] = top_1000_rows[col].apply(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Generating embeddings for column: Funder Type using PubMedBERT\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-11-7092d7125584>:28: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"{col}_embedding\"] = top_1000_rows[col].apply(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Generating embeddings for column: Study Type using PubMedBERT\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-11-7092d7125584>:28: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"{col}_embedding\"] = top_1000_rows[col].apply(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Generating embeddings for column: Study Design using PubMedBERT\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-11-7092d7125584>:28: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"{col}_embedding\"] = top_1000_rows[col].apply(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Generating embeddings for column: Locations using PubMedBERT\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-11-7092d7125584>:28: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"{col}_embedding\"] = top_1000_rows[col].apply(\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install umap-learn"
+      ],
+      "metadata": {
+        "id": "gfPr4CcMQWMx",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "094e9427-18ef-40b5-a216-afd5416b4b69"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Collecting umap-learn\n",
+            "  Downloading umap_learn-0.5.7-py3-none-any.whl.metadata (21 kB)\n",
+            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from umap-learn) (1.26.4)\n",
+            "Requirement already satisfied: scipy>=1.3.1 in /usr/local/lib/python3.10/dist-packages (from umap-learn) (1.13.1)\n",
+            "Requirement already satisfied: scikit-learn>=0.22 in /usr/local/lib/python3.10/dist-packages (from umap-learn) (1.6.0)\n",
+            "Requirement already satisfied: numba>=0.51.2 in /usr/local/lib/python3.10/dist-packages (from umap-learn) (0.60.0)\n",
+            "Collecting pynndescent>=0.5 (from umap-learn)\n",
+            "  Downloading pynndescent-0.5.13-py3-none-any.whl.metadata (6.8 kB)\n",
+            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from umap-learn) (4.67.1)\n",
+            "Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.2->umap-learn) (0.43.0)\n",
+            "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.10/dist-packages (from pynndescent>=0.5->umap-learn) (1.4.2)\n",
+            "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.22->umap-learn) (3.5.0)\n",
+            "Downloading umap_learn-0.5.7-py3-none-any.whl (88 kB)\n",
+            "\u001b[?25l   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/88.8 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m88.8/88.8 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hDownloading pynndescent-0.5.13-py3-none-any.whl (56 kB)\n",
+            "\u001b[?25l   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/56.9 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.9/56.9 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hInstalling collected packages: pynndescent, umap-learn\n",
+            "Successfully installed pynndescent-0.5.13 umap-learn-0.5.7\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.manifold import TSNE\n",
+        "import umap\n",
+        "import matplotlib.pyplot as plt\n",
+        "from sklearn.metrics import pairwise_distances\n",
+        "import numpy as np\n",
+        "\n",
+        "# Combine all embedding columns into a single DataFrame\n",
+        "embedding_columns = [f\"{col}_embedding\" for col in [\n",
+        "    'Study Title', 'Study Status', 'Brief Summary', 'Study Results',\n",
+        "    'Conditions', 'Interventions', 'Primary Outcome Measures',\n",
+        "    'Funder Type', 'Study Type', 'Study Design', 'Locations'\n",
+        "]]\n",
+        "\n",
+        "# Flatten embeddings into a single numpy array\n",
+        "embedding_data = np.vstack(\n",
+        "    top_1000_rows[embedding_columns].apply(\n",
+        "        lambda row: np.concatenate(row.values), axis=1\n",
+        "    )\n",
+        ")\n",
+        "\n",
+        "# Use pairwise distances for elbow method\n",
+        "distances = pairwise_distances(embedding_data, metric='euclidean')\n",
+        "\n",
+        "# Find optimal dimensions for UMAP using the elbow method\n",
+        "inertia = []\n",
+        "dimensions = range(2, 50)  # Test dimensions from 2 to 50\n",
+        "for dim in dimensions:\n",
+        "    umap_reducer = umap.UMAP(n_components=dim, random_state=42)\n",
+        "    transformed = umap_reducer.fit_transform(embedding_data)\n",
+        "    inertia.append(np.sum(np.var(transformed, axis=0)))  # Variance in reduced dimensions\n",
+        "\n",
+        "# Plot the elbow curve\n",
+        "plt.figure(figsize=(8, 5))\n",
+        "plt.plot(dimensions, inertia, marker='o')\n",
+        "plt.title(\"Elbow Method for UMAP Dimension Reduction\")\n",
+        "plt.xlabel(\"Number of Components\")\n",
+        "plt.ylabel(\"Sum of Variance\")\n",
+        "plt.grid()\n",
+        "plt.show()"
+      ],
+      "metadata": {
+        "id": "g6Jg9xf1Vq8B",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "outputId": "1409e037-88f0-4038-d687-3ead90037e6a"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<Figure size 800x500 with 1 Axes>"
+            ],
+            "image/png": "\n"
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "optimal_dims = 40  # Example, replace with the value from the plot\n",
+        "\n",
+        "# Apply UMAP with optimal dimensions\n",
+        "umap_reducer = umap.UMAP(n_components=optimal_dims, random_state=42)\n",
+        "reduced_embeddings = umap_reducer.fit_transform(embedding_data)\n",
+        "\n",
+        "# Add reduced embeddings back to the DataFrame\n",
+        "for i in range(optimal_dims):\n",
+        "    top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]"
+      ],
+      "metadata": {
+        "id": "XZbYj6Eqp9Hf",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "54e88763-5ff2-401e-e6b1-e6d3f6d1705c"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.\n",
+            "  warn(\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n",
+            "<ipython-input-14-de94449a2449>:9: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  top_1000_rows[f\"UMAP_{i+1}\"] = reduced_embeddings[:, i]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
+        "from sklearn.metrics import mean_squared_error\n",
+        "import tensorflow as tf\n",
+        "from tensorflow.keras.models import Sequential\n",
+        "from tensorflow.keras.layers import SimpleRNN, Dense, Dropout\n",
+        "from tensorflow.keras.optimizers import Adam\n",
+        "from sklearn.compose import ColumnTransformer\n",
+        "from sklearn.pipeline import Pipeline\n",
+        "\n",
+        "# Step 1: Prepare features and target\n",
+        "# Select relevant columns\n",
+        "input_columns = [\n",
+        "    'Sex', 'Age', 'Phases', 'Enrollment',\n",
+        "    'Study Title', 'Study Status', 'Brief Summary', 'Study Results',\n",
+        "    'Conditions', 'Interventions', 'Primary Outcome Measures',\n",
+        "    'Funder Type', 'Study Type', 'Study Design'\n",
+        "]\n",
+        "\n",
+        "target_column = 'Time taken for Enrollment'\n",
+        "\n",
+        "# Ensure the columns exist in the dataframe after UMAP processing\n",
+        "input_data = top_1000_rows[input_columns]\n",
+        "target_data = top_1000_rows[target_column]\n",
+        "\n",
+        "# Step 2: Preprocess data\n",
+        "# Handling categorical features using one-hot encoding and scaling\n",
+        "# Columns for one-hot encoding\n",
+        "categorical_columns = ['Sex', 'Age', 'Phases', 'Enrollment']\n",
+        "\n",
+        "# Scale numerical features (UMAP embeddings)\n",
+        "scaler = StandardScaler()\n",
+        "\n",
+        "# Identify numerical columns (excluding categorical and text columns)\n",
+        "numerical_columns = [col for col in input_columns if col not in categorical_columns and col not in text_columns]\n",
+        "\n",
+        "# Ensure all columns are of numeric type before applying preprocessing\n",
+        "# Convert to numeric, replacing non-numeric values with NaN\n",
+        "for col in categorical_columns + numerical_columns:\n",
+        "    input_data[col] = pd.to_numeric(input_data[col], errors='coerce')\n",
+        "\n",
+        "# Impute NaN values if any (you can use a different strategy if needed)\n",
+        "input_data = input_data.fillna(0)\n",
+        "\n",
+        "# Column transformer to apply one-hot encoding to categorical columns and scaling to numerical columns\n",
+        "preprocessor = ColumnTransformer(\n",
+        "    transformers=[\n",
+        "        ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_columns),\n",
+        "        ('num', scaler, numerical_columns)\n",
+        "    ])\n",
+        "\n",
+        "# Apply the preprocessor to the input data\n",
+        "X = preprocessor.fit_transform(input_data)\n",
+        "\n",
+        "# For the target, make sure it's in a numeric format\n",
+        "y = target_data.values\n",
+        "y = pd.to_numeric(y, errors='coerce')\n",
+        "\n",
+        "# Impute NaN values if any (you can use a different strategy if needed)\n",
+        "y = np.nan_to_num(y)  # Replace NaN with 0\n",
+        "# --- End of the code to insert ---\n",
+        "\n",
+        "\n",
+        "# Step 3: Split data into training and testing sets (80:20)\n",
+        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
+      ],
+      "metadata": {
+        "id": "mm6YOeG_p-I0",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "eda3e0ca-7be4-454f-c7cf-3705c158c2dc"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-26-207c951d05e0>:42: SettingWithCopyWarning: \n",
+            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+            "Try using .loc[row_indexer,col_indexer] = value instead\n",
+            "\n",
+            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+            "  input_data[col] = pd.to_numeric(input_data[col], errors='coerce')\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from tensorflow.keras.layers import Bidirectional, LSTM\n",
+        "\n",
+        "# Step 4: Build the BiLSTM model\n",
+        "bi_lstm_model = Sequential()\n",
+        "\n",
+        "# Input layer: BiLSTM expects 3D input (samples, time steps, features)\n",
+        "# Reshaping input to (samples, time steps, features)\n",
+        "bi_lstm_model.add(tf.keras.layers.Reshape((1, X_train.shape[1]), input_shape=(X_train.shape[1],)))\n",
+        "\n",
+        "# BiLSTM layer with 64 units\n",
+        "bi_lstm_model.add(Bidirectional(LSTM(64, activation='relu', return_sequences=False)))\n",
+        "\n",
+        "# Dropout layer to prevent overfitting\n",
+        "bi_lstm_model.add(Dropout(0.2))\n",
+        "\n",
+        "# Output layer: predicting a continuous value (Time taken for Enrollment)\n",
+        "bi_lstm_model.add(Dense(1))\n",
+        "\n",
+        "# Step 5: Compile the BiLSTM model\n",
+        "bi_lstm_model.compile(optimizer=Adam(), loss='mean_squared_error')\n",
+        "\n",
+        "# Step 6: Train the BiLSTM model\n",
+        "bi_lstm_history = bi_lstm_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))\n",
+        "\n",
+        "# Step 7: Make predictions with the BiLSTM model and evaluate the RMSE\n",
+        "y_pred_bilstm = bi_lstm_model.predict(X_test)\n",
+        "\n",
+        "# Calculate RMSE (Root Mean Squared Error) for BiLSTM model\n",
+        "rmse_bilstm = np.sqrt(mean_squared_error(y_test, y_pred_bilstm))\n",
+        "print(f\"RMSE for BiLSTM: {rmse_bilstm}\")\n"
+      ],
+      "metadata": {
+        "id": "0MpM2NkTqGYe",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "ed87b58e-5718-4f35-da94-a8a36b3648f0"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/keras/src/layers/reshaping/reshape.py:39: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
+            "  super().__init__(**kwargs)\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 1/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 36ms/step - loss: 667.7067 - val_loss: 609.3492\n",
+            "Epoch 2/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 11ms/step - loss: 617.5209 - val_loss: 591.2498\n",
+            "Epoch 3/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 9ms/step - loss: 561.6722 - val_loss: 557.8972\n",
+            "Epoch 4/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 10ms/step - loss: 528.3793 - val_loss: 507.4259\n",
+            "Epoch 5/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 15ms/step - loss: 455.1757 - val_loss: 443.5311\n",
+            "Epoch 6/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 17ms/step - loss: 386.5001 - val_loss: 378.3217\n",
+            "Epoch 7/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 22ms/step - loss: 335.8933 - val_loss: 329.0683\n",
+            "Epoch 8/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16ms/step - loss: 278.2339 - val_loss: 300.2128\n",
+            "Epoch 9/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 21ms/step - loss: 305.0649 - val_loss: 287.5830\n",
+            "Epoch 10/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 20ms/step - loss: 273.1591 - val_loss: 280.5639\n",
+            "Epoch 11/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16ms/step - loss: 234.1577 - val_loss: 276.5512\n",
+            "Epoch 12/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 22ms/step - loss: 235.5674 - val_loss: 273.2498\n",
+            "Epoch 13/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 216.2296 - val_loss: 270.5114\n",
+            "Epoch 14/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16ms/step - loss: 216.4830 - val_loss: 268.1384\n",
+            "Epoch 15/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16ms/step - loss: 241.6802 - val_loss: 266.0169\n",
+            "Epoch 16/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 19ms/step - loss: 197.7046 - val_loss: 264.7904\n",
+            "Epoch 17/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 19ms/step - loss: 254.8440 - val_loss: 263.2259\n",
+            "Epoch 18/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 22ms/step - loss: 217.4115 - val_loss: 262.4696\n",
+            "Epoch 19/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 10ms/step - loss: 220.9622 - val_loss: 261.7163\n",
+            "Epoch 20/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 19ms/step - loss: 203.3700 - val_loss: 261.1432\n",
+            "Epoch 21/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 9ms/step - loss: 204.3532 - val_loss: 260.6401\n",
+            "Epoch 22/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 11ms/step - loss: 206.5609 - val_loss: 260.3468\n",
+            "Epoch 23/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 10ms/step - loss: 200.0678 - val_loss: 259.9577\n",
+            "Epoch 24/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 10ms/step - loss: 205.6685 - val_loss: 259.8051\n",
+            "Epoch 25/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 13ms/step - loss: 200.1108 - val_loss: 259.6185\n",
+            "Epoch 26/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 12ms/step - loss: 190.1116 - val_loss: 259.4397\n",
+            "Epoch 27/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 11ms/step - loss: 188.8570 - val_loss: 259.6147\n",
+            "Epoch 28/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 12ms/step - loss: 192.8143 - val_loss: 260.2402\n",
+            "Epoch 29/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 10ms/step - loss: 173.7915 - val_loss: 260.1749\n",
+            "Epoch 30/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 12ms/step - loss: 184.3625 - val_loss: 260.5374\n",
+            "Epoch 31/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 11ms/step - loss: 171.2388 - val_loss: 260.4700\n",
+            "Epoch 32/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 14ms/step - loss: 178.1833 - val_loss: 261.1990\n",
+            "Epoch 33/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 13ms/step - loss: 178.0006 - val_loss: 261.4209\n",
+            "Epoch 34/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 13ms/step - loss: 187.6684 - val_loss: 262.2680\n",
+            "Epoch 35/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 16ms/step - loss: 159.2341 - val_loss: 262.3969\n",
+            "Epoch 36/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 13ms/step - loss: 160.0466 - val_loss: 262.5146\n",
+            "Epoch 37/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 176.0917 - val_loss: 263.1417\n",
+            "Epoch 38/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 16ms/step - loss: 151.0455 - val_loss: 263.6153\n",
+            "Epoch 39/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 24ms/step - loss: 146.4944 - val_loss: 264.1271\n",
+            "Epoch 40/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 19ms/step - loss: 165.9863 - val_loss: 264.9947\n",
+            "Epoch 41/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 17ms/step - loss: 153.2215 - val_loss: 265.0475\n",
+            "Epoch 42/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 18ms/step - loss: 154.0866 - val_loss: 265.6365\n",
+            "Epoch 43/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 26ms/step - loss: 147.2036 - val_loss: 266.3539\n",
+            "Epoch 44/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 18ms/step - loss: 175.0763 - val_loss: 266.5692\n",
+            "Epoch 45/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 16ms/step - loss: 175.0344 - val_loss: 267.5154\n",
+            "Epoch 46/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16ms/step - loss: 154.6448 - val_loss: 267.3886\n",
+            "Epoch 47/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 19ms/step - loss: 145.1259 - val_loss: 267.1888\n",
+            "Epoch 48/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 20ms/step - loss: 160.2450 - val_loss: 267.4206\n",
+            "Epoch 49/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 15ms/step - loss: 144.1925 - val_loss: 267.6102\n",
+            "Epoch 50/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 13ms/step - loss: 144.6609 - val_loss: 268.0196\n",
+            "\u001b[1m7/7\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 51ms/step\n",
+            "RMSE for BiLSTM: 16.37130237010102\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model = Sequential()\n",
+        "\n",
+        "# Input layer: RNN expects 3D input (samples, time steps, features)\n",
+        "# Here, we'll use a single time step (1) for each sample (flatten the data to time steps)\n",
+        "model.add(tf.keras.layers.Reshape((1, X_train.shape[1]), input_shape=(X_train.shape[1],)))\n",
+        "\n",
+        "# RNN layer with 64 units\n",
+        "model.add(SimpleRNN(64, activation='relu', return_sequences=False))\n",
+        "\n",
+        "# Dropout layer to prevent overfitting\n",
+        "model.add(Dropout(0.3))\n",
+        "\n",
+        "# Output layer: predicting a continuous value (Time taken for Enrollment)\n",
+        "model.add(Dense(1))\n",
+        "\n",
+        "# Step 5: Compile the model\n",
+        "model.compile(optimizer=Adam(), loss='mean_squared_error')\n",
+        "\n",
+        "# Step 6: Train the model\n",
+        "history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))\n",
+        "\n",
+        "# Step 7: Make predictions and evaluate the model\n",
+        "y_pred = model.predict(X_test)\n",
+        "\n",
+        "# Calculate RMSE (Root Mean Squared Error)\n",
+        "rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n",
+        "print(f\"RMSE: {rmse}\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "A6JHsy5sBjRs",
+        "outputId": "997929af-bf12-4569-90ba-779b22ada812"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/keras/src/layers/reshaping/reshape.py:39: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
+            "  super().__init__(**kwargs)\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 1/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 26ms/step - loss: 579.7545 - val_loss: 598.7670\n",
+            "Epoch 2/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 541.1518 - val_loss: 571.0222\n",
+            "Epoch 3/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 532.0207 - val_loss: 529.2407\n",
+            "Epoch 4/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 454.2565 - val_loss: 477.3809\n",
+            "Epoch 5/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 5ms/step - loss: 415.7916 - val_loss: 424.5004\n",
+            "Epoch 6/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 452.6029 - val_loss: 376.9656\n",
+            "Epoch 7/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 5ms/step - loss: 358.6606 - val_loss: 340.9701\n",
+            "Epoch 8/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 310.2562 - val_loss: 314.1212\n",
+            "Epoch 9/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 305.6849 - val_loss: 297.5801\n",
+            "Epoch 10/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 9ms/step - loss: 249.7126 - val_loss: 288.5488\n",
+            "Epoch 11/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 14ms/step - loss: 230.8337 - val_loss: 281.9182\n",
+            "Epoch 12/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 7ms/step - loss: 264.7994 - val_loss: 277.4290\n",
+            "Epoch 13/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 12ms/step - loss: 230.6227 - val_loss: 274.1347\n",
+            "Epoch 14/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 224.2143 - val_loss: 271.3199\n",
+            "Epoch 15/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 218.0156 - val_loss: 269.0135\n",
+            "Epoch 16/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 5ms/step - loss: 235.3152 - val_loss: 266.9712\n",
+            "Epoch 17/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 7ms/step - loss: 251.2175 - val_loss: 265.0502\n",
+            "Epoch 18/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 5ms/step - loss: 251.1464 - val_loss: 263.4379\n",
+            "Epoch 19/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 220.6263 - val_loss: 261.9560\n",
+            "Epoch 20/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 5ms/step - loss: 232.6349 - val_loss: 260.8833\n",
+            "Epoch 21/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 7ms/step - loss: 230.8253 - val_loss: 259.8926\n",
+            "Epoch 22/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 7ms/step - loss: 226.6449 - val_loss: 258.7480\n",
+            "Epoch 23/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 5ms/step - loss: 211.2643 - val_loss: 258.2128\n",
+            "Epoch 24/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 5ms/step - loss: 204.0192 - val_loss: 257.8778\n",
+            "Epoch 25/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 7ms/step - loss: 246.8270 - val_loss: 257.0885\n",
+            "Epoch 26/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 7ms/step - loss: 221.6270 - val_loss: 256.7341\n",
+            "Epoch 27/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 190.9916 - val_loss: 256.3143\n",
+            "Epoch 28/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 5ms/step - loss: 205.6945 - val_loss: 256.1562\n",
+            "Epoch 29/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 7ms/step - loss: 238.1927 - val_loss: 255.8900\n",
+            "Epoch 30/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 7ms/step - loss: 199.9858 - val_loss: 255.9863\n",
+            "Epoch 31/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 5ms/step - loss: 218.4140 - val_loss: 256.0137\n",
+            "Epoch 32/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 8ms/step - loss: 233.9201 - val_loss: 255.7076\n",
+            "Epoch 33/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 7ms/step - loss: 216.9118 - val_loss: 256.2441\n",
+            "Epoch 34/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 11ms/step - loss: 214.1853 - val_loss: 255.9830\n",
+            "Epoch 35/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 9ms/step - loss: 216.3486 - val_loss: 256.2111\n",
+            "Epoch 36/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 10ms/step - loss: 175.9630 - val_loss: 256.3767\n",
+            "Epoch 37/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 9ms/step - loss: 204.1494 - val_loss: 256.2661\n",
+            "Epoch 38/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 6ms/step - loss: 205.1333 - val_loss: 256.2394\n",
+            "Epoch 39/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 5ms/step - loss: 194.3962 - val_loss: 256.8536\n",
+            "Epoch 40/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 8ms/step - loss: 192.2582 - val_loss: 257.1403\n",
+            "Epoch 41/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 200.7191 - val_loss: 257.0217\n",
+            "Epoch 42/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 5ms/step - loss: 179.5457 - val_loss: 257.5062\n",
+            "Epoch 43/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 182.3336 - val_loss: 257.6974\n",
+            "Epoch 44/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 186.9329 - val_loss: 258.0083\n",
+            "Epoch 45/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 175.0525 - val_loss: 257.9814\n",
+            "Epoch 46/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 208.5019 - val_loss: 258.1638\n",
+            "Epoch 47/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 7ms/step - loss: 204.7254 - val_loss: 258.5562\n",
+            "Epoch 48/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 6ms/step - loss: 185.3362 - val_loss: 258.8849\n",
+            "Epoch 49/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 5ms/step - loss: 185.8382 - val_loss: 259.3331\n",
+            "Epoch 50/50\n",
+            "\u001b[1m25/25\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 4ms/step - loss: 179.8962 - val_loss: 259.4713\n",
+            "\u001b[1m7/7\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 24ms/step\n",
+            "RMSE: 16.108112997784122\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "cB3wFHSgCFjN"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file