[349d16]: / code / data_preprocessing / Create_Embeddings.ipynb

Download this file

517 lines (517 with data), 45.2 kB

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Create Embeddings.ipynb",
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "djlQlzUZP0DW"
      },
      "source": [
        "## **Using pre-trained Word2Vec vectors on Biomedical text**"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "KkqB6AMOPxAg"
      },
      "source": [
        "## Mounting the drive"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "6j1ETQPyQ7Xj"
      },
      "source": [
        "Click the link below and authorize Colab to use your Drive. A key will be given after that. Copy that and paste it in the response box below to mount your Google Drive."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 0,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 129
        },
        "colab_type": "code",
        "id": "aD0mYooYVqT6",
        "outputId": "fa464e79-ca32-48b0-8b1c-b9d6244a1815"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n",
            "\n",
            "Enter your authorization code:\n",
            "··········\n",
            "Mounted at /content/drive\n"
          ]
        }
      ],
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "Wl1P7f7VPkie"
      },
      "source": [
        "## Installing Gensim to import pre-trained Word2Vec binary file"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 0,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 349
        },
        "colab_type": "code",
        "id": "vVDe2k5QPBx2",
        "outputId": "5906f90c-e55f-4b57-8ae0-66a33d369147"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: gensim in /usr/local/lib/python3.6/dist-packages (3.6.0)\n",
            "Requirement already satisfied: six>=1.5.0 in /usr/local/lib/python3.6/dist-packages (from gensim) (1.12.0)\n",
            "Requirement already satisfied: scipy>=0.18.1 in /usr/local/lib/python3.6/dist-packages (from gensim) (1.3.2)\n",
            "Requirement already satisfied: numpy>=1.11.3 in /usr/local/lib/python3.6/dist-packages (from gensim) (1.17.4)\n",
            "Requirement already satisfied: smart-open>=1.2.1 in /usr/local/lib/python3.6/dist-packages (from gensim) (1.9.0)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from smart-open>=1.2.1->gensim) (2.21.0)\n",
            "Requirement already satisfied: boto>=2.32 in /usr/local/lib/python3.6/dist-packages (from smart-open>=1.2.1->gensim) (2.49.0)\n",
            "Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from smart-open>=1.2.1->gensim) (1.10.18)\n",
            "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->smart-open>=1.2.1->gensim) (2.8)\n",
            "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->smart-open>=1.2.1->gensim) (3.0.4)\n",
            "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->smart-open>=1.2.1->gensim) (1.24.3)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->smart-open>=1.2.1->gensim) (2019.9.11)\n",
            "Requirement already satisfied: s3transfer<0.3.0,>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from boto3->smart-open>=1.2.1->gensim) (0.2.1)\n",
            "Requirement already satisfied: botocore<1.14.0,>=1.13.18 in /usr/local/lib/python3.6/dist-packages (from boto3->smart-open>=1.2.1->gensim) (1.13.18)\n",
            "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->smart-open>=1.2.1->gensim) (0.9.4)\n",
            "Requirement already satisfied: python-dateutil<2.8.1,>=2.1; python_version >= \"2.7\" in /usr/local/lib/python3.6/dist-packages (from botocore<1.14.0,>=1.13.18->boto3->smart-open>=1.2.1->gensim) (2.6.1)\n",
            "Requirement already satisfied: docutils<0.16,>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.14.0,>=1.13.18->boto3->smart-open>=1.2.1->gensim) (0.15.2)\n"
          ]
        }
      ],
      "source": [
        "pip install gensim"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 0,
      "metadata": {
        "colab": {},
        "colab_type": "code",
        "id": "mck8PUXWPF5R"
      },
      "outputs": [],
      "source": [
        "import pickle\n",
        "import random\n",
        "import numpy as np\n",
        "from gensim.models.keyedvectors import KeyedVectors"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "QplIXe-iPLqv"
      },
      "source": [
        "## Word2Vec Embeddings path"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 0,
      "metadata": {
        "colab": {},
        "colab_type": "code",
        "id": "7xn0BkbkNorg"
      },
      "outputs": [],
      "source": [
        "word2vec_path_bin = '/media/ramkabir/PC Data/ASU Data/Semester 3/BMNLP/Projects/medical_data/embeddings/bio_embedding_extrinsic'\n",
        "word2vec_path_txt = '/media/ramkabir/PC Data/ASU Data/Semester 3/BMNLP/Projects/medical_data/embeddings/bio_embedding_extrinsic.txt'\n",
        "word2vec_path_dictionary = '/media/ramkabir/PC Data/ASU Data/Semester 3/BMNLP/Projects/medical_data/embeddings/bio_embedding_dictionary.dat'\n",
        "word2vec_random_vec_path = '/media/ramkabir/PC Data/ASU Data/Semester 3/BMNLP/Projects/medical_data/embeddings/random_vec.dat'"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "NvG1uyfwPO3E"
      },
      "source": [
        "## Converting Word2Vec binary file into Text format"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 0,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 74
        },
        "colab_type": "code",
        "id": "i_935ScbPTyR",
        "outputId": "f177b66e-f629-4b61-b8f1-2a0e16bf5ec7"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.6/dist-packages/smart_open/smart_open_lib.py:402: UserWarning: This function is deprecated, use smart_open.open instead. See the migration notes for details: https://github.com/RaRe-Technologies/smart_open/blob/master/README.rst#migrating-to-the-new-open-function\n",
            "  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL\n"
          ]
        }
      ],
      "source": [
        "model = KeyedVectors.load_word2vec_format(word2vec_path_bin, binary=True)\n",
        "model.save_word2vec_format(word2vec_path_txt, binary=False)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "4hYPRBMaPawF"
      },
      "source": [
        "## Testing Word2Vec in Text format"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 0,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 404
        },
        "colab_type": "code",
        "id": "QG2R9-WJPdt5",
        "outputId": "6c2969d5-6567-4b6b-da2b-7fa95ca6ea6d"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "2324849 200\n",
            "\n",
            "the 0.02273408 0.21401943 -0.053885903 -0.13237028 0.12502918 0.16126284 -0.07451463 -0.14967322 -0.023728507 0.19733575 0.050005097 -0.08069371 -0.2260156 0.09950685 0.08872627 -0.003295694 0.121344924 0.12236745 0.046995945 -0.11031165 -0.0647292 -0.1103984 -0.3417561 0.0038654804 0.06961649 0.025639912 -0.13327652 0.337467 -0.017178265 -0.040492296 0.23432323 0.28060308 0.1356283 0.1227554 0.20075974 0.0047963345 -0.32055277 0.25563952 0.05774525 -0.14052501 -0.33290806 0.096904434 -0.1650102 -0.08489294 -0.08052749 0.1589681 -0.10933108 0.044205036 -0.17562449 -0.06108469 -0.44703272 -0.20632856 0.18085167 -0.02745544 -0.29225847 -0.24004638 0.083219714 0.22248244 -0.17233558 -0.08747911 -0.000398142 -0.24416988 0.06556866 0.09449897 -0.16092162 -0.12622572 -0.25597194 -0.1500615 0.11731122 -0.22604583 -0.025557904 0.032549977 0.14482148 -0.03889161 -0.07825571 0.09413212 0.15883411 0.034596562 -0.10479029 -0.07194895 -0.07308851 -0.032196343 0.36316708 0.12265507 0.052908786 -0.09343035 0.23356266 0.15282194 0.22533727 0.032841682 0.20543285 0.38537937 -0.30978137 0.05430988 0.06880429 -0.16253622 0.09465415 -0.06925405 -0.18896696 -0.029481905 0.14715491 0.08506173 0.27158326 -0.37324414 -0.46946794 -0.17569502 -0.08980528 0.035522606 -0.07862338 0.3208141 -0.2597154 -0.119839415 0.026192954 0.067445986 0.16163674 -0.042206783 -0.38949797 0.07648213 -0.16537796 0.016809925 0.16506003 0.32620525 0.095358156 0.005013975 0.0014261369 0.0017001033 -0.2673485 -0.17237446 0.3187142 -0.10715083 0.27657536 -0.343741 -0.20020838 0.012484201 0.08882111 -0.28091434 -0.020162731 0.36721274 -0.027041119 -0.010907327 -0.17451175 0.10702399 -0.2413658 0.15808347 -0.030567199 -0.14181466 -0.018535733 0.07842573 -0.019363267 -0.21801496 0.1989181 -0.15954545 -0.12181692 -0.2107519 -0.21235956 -0.23292848 0.006739301 0.0404175 0.09821297 0.022056874 -0.07894085 -0.08496858 0.32307485 0.10845722 0.011125122 -0.37773514 -0.03376975 -0.08987788 -0.123727985 -0.001561399 0.058019247 -0.08377683 -0.22610198 -0.18241878 0.1534749 0.08795728 -0.1320252 -0.4023356 -0.21920754 0.1107396 0.23031846 -0.31160125 -0.023965504 -0.0037958962 0.038802225 0.06564514 0.0686663 -0.17646416 0.17312534 -0.04120373 0.07008852 0.20014112 -0.119300306 0.18958251 -0.08827216 0.19794598 -0.18330821 -0.14011972 0.10919319 0.13455747\n",
            "\n",
            "of 0.112247586 0.15358572 -0.09591484 -0.0608249 0.11896096 0.15540639 -0.15660456 -0.20304194 -0.018450469 0.14359745 -0.0046009496 -0.011727184 -0.20600139 0.19115274 0.09406891 0.031573385 0.08868867 -0.027285911 0.039658904 -0.04071243 -0.089013875 -0.05784092 -0.32782844 0.06695212 0.1281697 0.043196917 -0.15796041 0.37234855 -0.0067652464 0.01899217 0.27129802 0.33771396 0.26030347 0.08338694 0.16608532 -0.07076314 -0.27856457 0.21565469 0.11604118 -0.05268976 -0.4203546 0.033061862 -0.095556766 -0.14047194 -0.0979964 0.048481166 -0.06322959 0.08681105 -0.09874001 -0.09987119 -0.45547357 -0.17062595 0.123416394 0.0039383173 -0.26849192 -0.070658065 0.008982316 0.17147493 -0.10338548 -0.12545899 0.07671037 -0.19858089 0.11015336 0.05925609 -0.1866241 -0.145196 -0.3650959 -0.03170365 0.05697851 -0.16372813 -0.14302377 0.0955441 0.06512794 -0.030409187 -0.10604329 0.02038093 0.094831035 0.025560616 -0.091462374 -0.09604226 -0.14649662 -0.11669222 0.3003593 -0.05960603 0.07102513 0.0031536287 0.24042346 0.16587849 0.21404317 0.016933165 0.08306224 0.42685136 -0.27621853 0.08189513 0.08334793 -0.102525875 0.011885315 -0.113929264 -0.14254636 0.08452268 0.19125345 -0.00072386116 0.18702185 -0.31263143 -0.3901186 -0.08078607 -0.024802476 -0.04335107 -0.03178752 0.36371124 -0.23325107 -0.11624608 0.005041331 -0.08192334 -0.029087529 0.10549089 -0.450431 0.070917755 -0.12066687 0.014578275 0.19813585 0.3037796 0.10998475 -0.064364485 -0.026231676 -0.077966586 -0.33656114 -0.31138954 0.34601346 -0.031980403 0.15125188 -0.42520356 -0.15776272 -0.11383298 -0.030993633 -0.22382961 0.033448122 0.3659662 -0.015528411 -0.06917462 -0.12194459 0.00812158 -0.2640427 0.17969829 -0.038359612 -0.17199431 0.07300444 -0.034231856 -0.099781424 -0.13081457 0.2468158 -0.12311888 -0.16696137 -0.33632666 -0.16880459 -0.03135079 0.0007338971 -0.0012068152 0.20754582 -0.049254 -0.037421852 -0.07663785 0.33299962 0.052308828 0.031599194 -0.2200595 -0.06279315 0.013871025 -0.08729389 -0.060639784 0.10078897 -0.2397276 -0.19482164 -0.32164133 0.18220182 0.09109442 -0.17176405 -0.3577575 -0.35992253 -0.060257643 0.13766569 -0.30787504 -0.05998431 -0.087761566 -0.077231094 0.22220808 0.16417472 -0.27105546 0.15766722 -0.06415099 0.018098757 0.19554472 -0.11793022 0.19664752 -0.27010787 0.14784685 -0.090002 -0.13160227 0.11471468 0.06527114\n",
            "\n",
            "and 0.2954278 0.10866095 0.039514206 -0.12807044 -0.03719324 0.12956138 -0.13437104 -0.25504574 0.0683655 0.12217373 0.021897238 -0.09952249 -0.19956146 0.10827263 -0.0038576636 -0.03654604 0.0808193 0.04097017 -0.0013400869 -0.10666085 -0.18388803 0.046822388 -0.20536058 -0.09571695 0.037540533 -0.07435882 -0.28002754 0.24672 -0.04948553 -0.14330873 0.17734316 0.25495276 0.1113497 0.068458915 0.34858614 0.19559419 -0.2531531 0.20233405 0.06896409 -0.08139057 -0.41791004 0.031309072 -0.15502937 -0.105013534 -0.12159044 0.10201652 -0.19323587 0.09129319 -0.21867265 -0.03375685 -0.5406527 -0.2245057 0.09568601 0.02580309 -0.17378142 -0.097645506 0.09806963 0.05948789 -0.15242802 -0.17918608 0.056974035 -0.14176355 0.05326218 0.12720692 -0.37827373 -0.18795773 -0.34084672 0.17531572 -0.13031898 -0.23481944 -0.2609509 0.16538146 0.020811541 -0.022311466 -0.20076169 0.04914267 0.023852477 0.12891662 -0.040374108 0.032431487 -0.23274414 0.0140903 0.37581497 -0.12684728 0.060313415 0.011575043 0.041978948 0.13933852 0.21536715 -0.024683764 0.12354567 0.36395726 -0.3630775 0.14778635 0.16783735 -0.08558737 0.1083083 -0.06290203 -0.08852609 0.14561622 0.24017456 0.017588181 0.25390777 -0.24487539 -0.25807348 -0.12901932 -0.028223762 -0.22830272 -0.008605361 0.27880672 -0.28866428 -0.17792003 -0.019774983 0.05865595 0.08366411 -0.015540698 -0.52938783 0.10631287 -0.24598898 -0.03218941 0.016047733 0.37122065 0.06158575 -0.11312414 0.062214594 0.06765672 -0.12650087 -0.26546612 0.17612733 -0.19017676 0.07542793 -0.32706967 -0.021291379 -0.09240149 -0.13880695 -0.26526427 -0.057828307 0.23518872 -0.1951798 0.032625727 -0.18436329 -0.10290183 -0.17786439 0.19817956 0.031871933 -0.08509521 -0.07347151 0.2316232 -0.02935103 -0.02220571 0.17913093 -0.19553055 -0.2970028 -0.26715773 -0.07300062 -0.0027157876 0.03115043 0.06681323 0.2391679 -0.048274823 -0.014128736 -0.1263169 0.24583025 -0.042214323 0.026274826 -0.23761602 0.03178886 -0.02575228 -0.06348231 0.16083224 0.18212979 -0.31400654 -0.1248523 -0.3656392 0.16548565 -0.09743958 -0.108478956 -0.24333206 -0.2093376 0.06796154 0.039704777 -0.27935344 -0.098947324 -0.08068223 -0.12745015 0.18529007 0.24998106 -0.2129804 0.19328143 -0.07079298 -0.11581636 0.07032351 -0.12333764 0.26423004 -0.26182437 0.29612043 0.0037112746 -0.14159988 0.18385838 0.16955267\n",
            "\n",
            "in 0.26430684 0.07853405 0.021841273 0.059909694 0.023872733 0.09979558 -0.32387376 -0.31616172 0.20326576 0.056536727 -0.09302978 0.013212934 -0.24420044 0.14549518 -0.03069833 0.14639819 -0.12608723 0.0062408894 0.057508543 0.06952083 -0.1194478 0.038836174 -0.17133 -0.0014667176 -0.0114560425 -0.05928138 -0.20805848 0.3122653 -0.059815757 -0.015557617 0.37957096 0.29167697 -0.076629296 0.122038364 0.24012494 -0.06588212 -0.10847588 0.084207915 7.097423e-05 -0.19828469 -0.2690731 -0.1127072 -0.1775856 -0.022222295 -0.157022 -0.06283565 -0.22407073 0.24961264 -0.20417383 0.105300605 -0.5847929 -0.27360934 0.14611737 -0.112668954 -0.23778161 -0.015005171 -0.21955326 0.19934344 -0.2523028 -0.0020252839 0.111574695 -0.12213606 0.052636072 0.2443158 -0.13177454 -0.074640766 -0.40500557 -0.052159607 0.12392647 -0.10906825 -0.14302701 0.054152243 0.098017134 -0.16765028 -0.002274081 0.20796826 0.069585375 -0.10974622 -0.25902998 0.17591368 -0.16755931 0.02056384 0.55850303 0.033342175 0.12787084 0.06186284 0.12557745 0.21137494 0.057616636 0.013092384 0.17753185 0.44829386 -0.3885914 0.114191115 -0.086731315 -0.22260784 0.24127164 -0.16827261 -0.17973056 -0.0031558005 0.20703186 -0.01153788 0.30383068 -0.26097804 -0.30880043 -0.100989535 -0.145409 -0.1235824 0.05068446 0.22996739 -0.23358408 -0.17743392 0.1102351 0.057922825 0.19008747 -0.1354245 -0.36479563 0.16766284 -0.30718124 0.07940765 0.21532883 0.32517117 0.22860084 -0.042826623 -0.0702002 0.021890424 -0.15323855 -0.13590302 0.2770359 -0.04137093 0.094553225 -0.31431463 -0.017759219 -0.07800118 -0.07076634 -0.12994899 -0.036883973 0.20950705 -0.2416324 -0.151165 -0.20721605 -0.07342885 -0.16455254 0.059487112 -0.0692222 -0.27298334 -0.0065324083 0.08072554 -0.06768218 -0.121256694 0.17274597 -0.14680652 -0.056510627 -0.33411193 -0.05880048 0.06221968 -0.087501585 0.09611575 0.16968518 -0.07699248 -0.12793866 -0.002547592 0.22009252 0.025679842 0.22187935 -0.27524364 -0.056340426 -0.15436935 -0.13136032 -0.12964293 0.18757927 -0.40909547 -0.3566799 -0.083827436 0.14845836 0.0935213 -0.17494053 -0.38671032 -0.22727993 -0.008779446 0.32893252 -0.32182172 -0.3060885 0.015149704 -0.08586065 0.00032856315 -0.00027674437 -0.21016574 0.23651668 -0.18878073 -0.09872836 0.2470049 -0.03533335 0.12444766 -0.24679434 0.30684286 -0.21857259 -0.058799118 0.15636942 0.15476438\n",
            "\n",
            "to 0.39222196 0.075657636 -0.008611482 -0.27179918 0.30118826 0.25613767 -0.063301384 -0.19365516 0.11646689 0.11812262 0.0112959165 0.043591306 -0.2358264 -0.0043770224 0.16006434 0.024561875 0.19313085 0.19254832 0.106592715 -0.07972283 -0.15060186 0.060296282 0.068974584 0.09113875 0.2087188 -0.2011881 -0.4049762 0.25299516 -0.13955526 -0.36949986 0.39116508 0.46202344 0.061378747 0.2110782 0.37109122 0.15840784 -0.22467658 0.18264681 -0.13371809 -0.14058158 -0.30851078 0.022095114 -0.09131846 -0.0970242 -0.105001085 -0.109173246 -0.26363304 -0.049314782 -0.023216322 -0.20229888 -0.55462027 -0.18832994 -0.05237159 0.17537801 -0.23754966 -0.3547138 0.38661513 0.1302085 -0.18580918 -0.22925684 0.060834408 -0.18071243 -0.059060067 -0.110712625 -0.26885897 -0.02937451 -0.21845084 0.08852282 -0.16485822 -0.0985597 -0.3512196 0.079883486 0.029441714 0.052446835 0.09234531 -0.10296282 0.3345006 0.1099605 -0.023294108 -0.06852321 -0.15161595 0.08442938 0.28936157 0.25353718 0.067490414 -0.09340225 0.045898482 -0.017615221 0.03650456 -0.040954128 0.21322562 0.48587066 -0.30880922 0.15572354 0.053686894 -0.10950658 0.20545879 -0.27712253 -0.13817918 0.16158068 0.17185867 -0.057763726 0.27750468 -0.4077974 -0.28795022 0.08115413 0.10556185 -0.12697564 -0.211487 0.20977932 -0.053795014 -0.15742843 -0.23862222 0.098012 0.21632169 -0.19754586 -0.43050957 0.15560022 -0.4091614 0.07102314 -0.16530412 0.48416537 -0.040532574 -0.04461942 -0.042166974 0.052414224 -0.055350967 -0.26422 0.39641362 -0.0080969855 0.21298201 -0.29477012 -0.15790802 -0.009992674 0.11426201 -0.19844358 0.2217578 0.32562107 -0.16281712 -0.077608466 -0.26329023 0.15239649 -0.010590942 0.26513216 0.050943643 -0.31334126 -0.11132419 0.15649384 0.08244656 -0.08966929 0.23087707 -0.26762366 -0.29656792 -0.3367542 0.017887354 -0.027751327 0.14229311 0.22233728 0.30250007 0.04662931 0.14493866 0.09167783 0.32441136 0.14243734 0.18451697 -0.33027166 0.014822692 0.03994447 -0.054676663 -0.07294519 0.15723239 -0.36011997 -0.1947941 -0.26190624 0.06750282 -0.15234223 -0.23423846 -0.26357841 -0.10787958 0.07452646 0.18253334 -0.31294435 -0.16478242 0.15047333 0.09480339 -0.041404538 0.393452 -0.12929888 0.16814426 -0.015558209 0.15781528 0.117009334 -0.09999345 0.32110643 -0.23735721 0.22088367 -0.27730986 -0.25288612 0.089038074 0.36214545\n",
            "\n",
            "a 0.0023810673 0.23303464 -0.11486191 0.20809817 0.28598744 0.08076243 0.014905669 0.0018583983 -0.048460767 0.035408534 -0.014609218 -0.14184853 -0.120545045 0.12113197 0.18377715 0.11608329 0.039019622 -0.11896608 -0.0019215643 0.0067745447 -0.05058384 -0.04319941 -0.23051524 0.047814474 0.0057810694 -0.16386034 -0.10287349 0.19966722 -0.014351924 -0.08994669 0.09780039 0.3865875 -0.139379 -0.04170018 0.114855304 0.15545449 -0.23505822 0.3764296 0.15255386 -0.14054677 -0.29809445 -0.009750411 0.075450055 -0.28476378 -0.19776393 0.20149553 -0.24064103 -0.050066475 0.0031124274 -0.019162491 -0.33682913 0.1508399 0.05299875 0.121925786 -0.28980285 -0.020588756 -0.023852222 0.15871564 -0.18545312 -0.20381568 0.01218611 -0.21497172 0.261342 -0.15657935 -0.47635025 -0.11437577 -0.24393311 0.087948 0.0887647 -0.17179365 -0.025392003 -0.06489065 -0.077598944 -0.2550719 -0.10970882 0.1586352 0.09910652 0.033134118 -0.18765706 0.21320665 0.199979 -0.12248948 0.20617564 -0.2644177 0.10104979 -0.014969289 0.1983133 0.062944576 0.1234052 -0.0048145354 0.0021822155 0.27158827 -0.44556168 0.26409662 0.024052182 -0.20513788 0.19811618 -0.16941175 0.034378517 -0.018760197 0.116038814 -0.00354442 0.19285508 -0.38854605 -0.38240418 -0.21566004 -0.2862016 0.0705315 -0.059303682 0.18750215 -0.14636627 -0.03800995 0.074265234 0.19512363 0.2083891 -0.17001641 -0.2821091 0.1636987 -0.27413964 0.038518064 0.22912034 0.2436009 0.08914929 0.21528566 -0.1001769 0.05264885 -0.23805994 -0.224511 0.36786416 -0.1616624 0.19030145 -0.38092536 -0.15666972 0.18827885 -0.109945714 -0.04566904 0.14093338 0.27884144 -0.040825725 0.06393859 -0.10843121 0.093237795 -0.11436574 0.40258485 -0.15163329 -0.21182105 0.1555279 -0.0923882 -0.045850825 -0.37316486 0.24922247 -0.3260423 0.13031285 -0.38550726 -0.11792968 -0.0958035 0.1200344 -0.13492982 0.12852052 -0.060628485 0.18630682 0.039333813 0.3601736 -0.02426114 0.05662573 -0.19310744 -0.004934147 0.018763604 -0.13611993 -0.06603844 0.24024609 -0.10781483 -0.1900232 -0.4835021 0.38222137 -0.008122437 -0.21854931 -0.3948316 -0.19527663 -0.0816516 0.10666129 -0.3742676 -0.085347965 0.25234511 -0.060093254 0.22343187 -0.058181286 -0.1079882 0.21970448 -0.050010644 -0.033672906 0.10507867 -0.078871325 0.23345824 -0.23106232 0.23395889 -0.4893471 -0.29641494 -0.10930418 0.30982468\n",
            "\n",
            "with 0.22005425 0.18927146 -0.026903247 -0.01163188 0.012548035 0.34841406 -0.1696956 -0.3651929 0.051256243 -0.051426705 -0.110000916 -0.042329106 -0.17952293 -0.024924822 0.040052988 0.29588363 0.04003283 -0.08407121 -0.0883574 -0.07985007 -0.13601732 0.14621133 -0.16565041 -0.028761007 -0.037975457 -0.107525066 -0.15032525 0.3773862 0.026132353 -0.36588156 0.22030224 0.521051 0.015932573 0.017385906 0.18141447 0.31524888 -0.09996447 0.16163106 0.068997905 -0.042522952 -0.19267468 -0.0626558 -0.07820184 -0.31044114 -0.20468806 0.12923938 -0.19570431 0.05877427 0.043309547 0.008893297 -0.41764668 -0.19363572 0.06719537 0.07419441 0.01911896 -0.15203835 0.042201485 -0.0062496555 -0.16810672 -0.26548547 0.21440172 -0.24191928 0.12240057 0.21895799 -0.33301246 -0.115016766 -0.21732312 0.12986922 0.10564297 -0.34316352 -0.25149706 -0.017499078 -0.0013491024 -0.17134076 -0.051353905 0.09938857 0.031169636 0.12771596 -0.03936462 -0.16556604 -0.4002632 0.018668652 0.24073525 -0.009911695 0.06873731 0.009025628 0.07576834 0.060310565 0.06530996 0.016351743 0.21059655 0.5042253 -0.3751217 0.2798365 0.13428973 -0.14533718 -0.10475252 -0.03259947 0.028660167 0.095865615 0.25788364 0.11675115 0.07369735 -0.14345068 -0.28758803 -0.09898265 -0.1892581 -0.19196129 0.011848981 0.3620383 -0.13227993 -0.17627323 -0.005637727 0.0023819723 0.1030426 -0.036095966 -0.2011506 0.28371027 -0.20165402 0.09973652 0.14717644 0.39985457 0.21124592 -0.29511613 -0.021800734 0.35046932 -0.18185736 -0.24812499 0.30346015 0.0055367784 0.08355093 -0.6204914 -0.124209926 -0.23449378 -0.06699107 0.003835819 -0.14737006 -0.024359984 -0.11680866 0.12630875 -0.11408574 -0.062476408 0.0855896 0.20938791 -0.17643939 -0.11370218 -0.1266108 0.16076984 0.028621387 -0.0998132 0.29264966 -0.13594022 -0.13988076 -0.24902464 -0.07234015 0.06162634 -0.1508739 0.047651518 -0.07077015 0.011902544 0.09083959 0.04941821 0.23379284 0.13219343 0.13696471 -0.11255136 -0.23659433 0.18742162 -0.14252824 -0.2624418 0.2198072 -0.29852006 -0.1767161 -0.46455327 0.3343396 -0.07619489 -0.28153953 -0.14878112 -0.0020993515 0.032093145 0.13966344 -0.3607292 -0.18723999 0.030781649 -0.1419825 0.10331268 0.037079215 -0.21155792 0.2889107 0.000872081 -0.09945414 0.19369422 -0.03861529 0.33115697 -0.28961983 0.18187547 5.9962273e-05 -0.18650688 -0.057644583 0.16642225\n",
            "\n",
            "for 0.13123728 0.19695511 -0.09912366 0.08443279 -0.03233974 0.15573902 -0.41305473 -0.29213238 -0.18629797 0.09687894 -0.008439311 -0.0788675 -0.01572824 0.36991304 0.23210928 -0.15454449 -0.10858204 -0.18053706 0.0019525333 -0.06794005 -0.29074368 0.09940158 -0.28595123 -0.1611768 0.1874804 -0.07369699 -0.17487632 0.20269373 -0.11075059 -0.31380507 0.2129178 0.34727916 -0.0055144257 -0.037445102 0.22963428 0.18287817 -0.18604513 0.26057026 0.034864057 -0.14501642 -0.24034421 -0.10212452 -0.19452849 -0.15416305 -0.15637448 -0.051526938 -0.24637185 -0.08898587 -0.07243025 0.04457472 -0.5369514 -0.12147151 0.049178362 0.012230669 -0.3057832 -0.01872131 -0.0841143 0.16984479 -0.31021285 -0.32267877 -0.08787874 -0.2709059 0.25756192 0.1597915 -0.1712075 -0.07326661 -0.12803034 0.13358562 -0.12966776 0.029782942 -0.2671869 0.183365 0.17195606 -0.29103115 -0.24568833 0.08732569 0.1552956 0.21784988 -0.19092105 0.021557553 -0.2516261 0.07874747 0.26683056 -0.2205759 -0.027597103 0.022475796 0.0852148 0.23262838 0.20965661 -0.12551615 0.31680945 0.45687893 -0.4472075 0.34150937 0.16582789 -0.13046095 -0.1652334 -0.3397129 0.091203034 0.23487592 0.34925207 0.2806296 0.21870875 -0.23180147 -0.16675687 -0.18836568 0.09158003 -0.089358486 -0.12355181 0.25474647 -0.31091833 -0.057946946 0.011541729 0.1071412 -0.08405072 0.050876174 -0.38178116 0.2115883 -0.19808368 0.13568275 0.060665447 0.31850657 0.044123463 -0.08178677 0.051065903 -0.10957955 -0.09991779 -0.4273681 0.3511722 -0.22077128 0.3463177 -0.3017761 -0.03339111 -0.027524471 -0.104288645 -0.32198521 -0.11898229 0.2806056 -0.057215076 -0.085302286 -0.172564 -0.070749395 -0.07453917 0.056348067 -0.34822553 -0.19760768 0.17571865 0.20413892 -0.106884465 0.018021736 0.17161433 -0.051248014 -0.033265863 -0.42254516 -0.0041944236 -0.1297536 0.23490092 -0.038576018 0.22991857 -0.14864402 0.04051875 0.086792365 0.18244076 0.061563782 -0.05491536 -0.14732456 0.071167514 -0.015210932 -0.058998767 -0.06181866 0.23162065 -0.17053631 -0.17824925 -0.32257876 0.18943812 -0.017151177 -0.16670802 -0.17270514 -0.27506405 0.11992814 0.017713124 -0.43250933 -0.33928704 0.028149331 -0.14577262 0.19246404 0.087792225 -0.14441071 0.11441351 -0.21999238 0.06793984 0.06380469 -0.15739973 0.29522815 -0.16798751 0.21418858 -0.23291309 -0.09812323 0.14653187 0.2059044\n",
            "\n",
            "was 0.014052485 0.2289207 0.116465926 -0.3134549 0.17988834 0.34829095 0.4158189 -0.4118456 0.07679727 0.058249813 -0.07494656 -0.14733139 -0.30183014 0.17461757 0.20188265 0.09410064 0.1774688 0.23853073 0.06294002 -0.23518643 -0.059300236 -0.20227392 -0.044103384 -0.16386352 0.22317766 -0.26223913 -0.17225198 0.55221397 -0.09918676 -0.33741418 0.1500311 0.20279287 0.24919479 0.3306684 -0.0076914174 0.32582423 -0.14141355 0.0932414 -0.14170012 -0.116675094 -0.16399534 0.17619231 -0.16136742 -0.41171354 0.12476867 0.27881828 -0.39771885 0.16336338 0.005572387 -0.40842962 -0.05617737 -0.0460771 0.19284895 0.15999253 -0.2251041 -0.2673534 -0.08214736 -0.15322368 -0.15562347 0.04046846 0.19185494 -0.12635152 0.039844837 0.15594175 -0.18025544 -0.25328842 -0.17391421 0.047368962 0.04489535 -0.32872003 -0.24146092 0.075519554 0.24526517 -0.2138233 -0.12379112 0.1441331 -0.023121068 -0.0064152735 -0.24778552 -0.18610902 0.05658372 -0.16968097 0.22137535 -0.10634978 -0.13875398 -0.16264465 0.22005358 0.07660276 0.19576274 -0.13919286 -0.024266621 0.43372828 -0.29944897 0.014672518 0.14136353 -0.122159116 -0.23074558 0.0014995805 0.08222413 0.10103864 0.2993359 0.07758466 0.23371722 -0.10953543 -0.6168602 -0.23065285 -0.0025122848 -0.22479759 -0.20930292 0.11262761 -0.29352194 -0.0005557836 0.24201894 0.30671147 0.10007267 -0.09245091 -0.5499479 -0.04360807 -0.23313715 0.07587397 0.11511905 0.43678924 -0.0067920345 0.0783242 0.06788976 0.02830105 -0.004570396 -0.11064432 0.54493743 0.03537059 0.42052647 -0.4400107 0.044326134 0.01951296 0.034523554 -0.1943648 0.2710953 0.1652812 -0.65297264 -0.18108234 0.02674074 0.16964999 -0.13371322 0.2648287 -0.05822907 0.11698701 -0.024320802 -0.20939443 -0.3607151 -0.21477224 0.19222759 -0.39731783 -0.3171196 0.0032270807 -0.22471997 -0.11018496 0.028464062 -0.023082737 0.22868907 0.052735467 -0.12805094 -0.14400569 0.14288498 -0.25838515 0.27403793 -0.41298863 -0.21413414 -0.1383318 -0.31998533 -0.11995595 -0.18300451 -0.23442817 -0.15472601 -0.16854559 0.30037564 0.2764153 -0.31358227 -0.30845013 0.30671883 -0.04748038 0.1263355 -0.48926327 -0.18327112 0.13719036 -0.027909892 -0.28435373 0.017228413 -0.10711871 0.44948143 -0.0722058 -0.13867849 0.20835352 -0.12055206 0.19474985 -0.055499464 0.27399293 -0.15577237 -0.3142988 -0.09180639 0.3395141\n",
            "\n"
          ]
        }
      ],
      "source": [
        "counter = 0\n",
        "limit = 10\n",
        "\n",
        "with open(word2vec_path_txt) as f:\n",
        "    for line in f:\n",
        "        if counter == limit:\n",
        "            break\n",
        "        else:\n",
        "            print(line)\n",
        "            counter += 1"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "kY5-JqGvTJ8v"
      },
      "source": [
        "## Working with embeddings file to make dictionary and saving using pickle"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 0,
      "metadata": {
        "colab": {},
        "colab_type": "code",
        "id": "WjbQ0-rGU29B"
      },
      "outputs": [],
      "source": [
        "embedding_dict = {}     # Stores word-embeddings pair\n",
        "\n",
        "with open(word2vec_path_txt) as f:\n",
        "    next(f)                                  # Discarding the first line\n",
        "    for line in f:\n",
        "        temp_line = line.split()             # Separating word and vector\n",
        "        word = temp_line[0]\n",
        "        if word.isdigit() == False:          # Skipping the keys which are pure numbers\n",
        "            embed_vec = np.asarray(temp_line[1:], dtype=np.float32) # Converting vector into NumPy array\n",
        "            embedding_dict[word] = embed_vec # Inserting into the dictionary\n",
        "\n",
        "pickle.dump(embedding_dict, open(word2vec_path_dictionary,'wb'))    # Saving the dictionary using Pickle"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "QrF5156P_8BO"
      },
      "source": [
        "## Writing a random vector to a file"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 0,
      "metadata": {
        "colab": {},
        "colab_type": "code",
        "id": "oPuDhbx9_vSI"
      },
      "outputs": [],
      "source": [
        "with open(word2vec_path_txt) as f:\n",
        "    next(f)                                  # Discarding the first line\n",
        "    for line in f:\n",
        "        temp_line = line.split()             # Separating word and vector\n",
        "        word = temp_line[0]\n",
        "        if word == '66336':\n",
        "            temp_line = temp_line[1:]\n",
        "            random.shuffle(temp_line)\n",
        "            pickle.dump(temp_line, open(word2vec_random_vec_path, 'wb'))\n",
        "            break"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "GL0l3IGSmSKQ"
      },
      "source": [
        "## Testing Random Vector"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 0,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 55
        },
        "colab_type": "code",
        "id": "l7Pl3IkhmU-e",
        "outputId": "d233a5c2-f050-4d5e-b06b-18d062adea5c"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "['-0.06202219', '0.49705315', '-0.36654922', '-0.43995985', '0.30156144', '0.06809836', '-0.46773717', '0.06931866', '-0.32421592', '0.44589454', '-0.90800893', '-0.20277438', '-0.0402272', '-0.44873247', '0.0014344573', '-0.04454809', '-0.30091795', '0.020562021', '0.10120315', '0.16264242', '-0.020901222', '-0.08942807', '-0.5563926', '0.14661679', '0.14776026', '-0.34733492', '-0.6683234', '0.26168486', '0.3407398', '-0.3354475', '-0.1255758', '-0.02666929', '0.0918361', '0.5009282', '-0.45792708', '-0.04439764', '-0.5499033', '-0.113795616', '1.1885228', '-0.058911942', '-0.41393474', '0.33377483', '0.27774987', '-0.57757825', '0.1551013', '-0.5133394', '-0.044840828', '0.17051798', '0.5772125', '0.5979373', '0.3057155', '-0.58410317', '-0.49211395', '0.24493715', '0.030653883', '-0.13294867', '-0.4728327', '-0.3658061', '-0.13136336', '0.84137726', '0.35759398', '-0.39076868', '-0.47601986', '-0.15807828', '-0.32230842', '0.5736673', '0.10251016', '-0.22305535', '0.6019472', '0.4420645', '-0.46943384', '-0.29570258', '0.22330889', '-0.08749167', '0.68458676', '0.120522656', '0.41199297', '-0.29102385', '0.13900696', '0.30740988', '-0.20342875', '-0.15324421', '-0.09903324', '-0.5130624', '0.81314605', '-0.31369278', '0.21130149', '-0.20777808', '0.4112', '-0.10096193', '0.03762949', '-0.53771573', '0.087632895', '-0.46239078', '0.012757956', '0.06246767', '-0.5669881', '-0.5315525', '-0.48883438', '0.083203696', '0.38436732', '0.044447124', '0.1402283', '-0.41875878', '0.016406676', '-0.23719022', '-0.15593818', '-0.100129984', '-0.09299469', '0.10641771', '-0.6308218', '-0.044871315', '-0.2505729', '0.3566824', '0.5091699', '-0.32189533', '-0.34231603', '0.6467254', '0.16248347', '0.47121587', '-0.102524154', '-0.09732677', '-0.56183124', '-0.14052574', '0.011468398', '-0.028652608', '0.043500368', '-0.28032795', '-0.2645208', '-0.22370586', '-0.4220163', '0.07244505', '-0.3648586', '-0.08864754', '-0.12027645', '0.13662106', '0.45457038', '0.550946', '0.40306813', '-0.16677737', '-0.18171692', '0.23511265', '0.005288494', '0.32010877', '-0.1576755', '0.24805954', '-0.2032046', '-0.18395862', '-0.27307007', '0.2474421', '-0.20866701', '0.84966844', '0.44931448', '-0.11688226', '0.4526595', '0.83472836', '-0.07151044', '-0.20185058', '-0.15870114', '0.27528426', '-0.608415', '-0.01909444', '-0.15704517', '0.19005245', '0.14632659', '-0.3669248', '-0.1399837', '-0.6684023', '0.019115085', '0.056331117', '-0.6122163', '-0.3967059', '-0.008734393', '0.33832374', '-0.37785307', '-0.2154151', '0.58528256', '-0.94987905', '-0.11023157', '0.3746075', '-0.039364513', '-0.37785843', '0.7483953', '0.20664652', '0.11635872', '-0.23331513', '0.020975228', '-0.4561034', '0.20162527', '0.035976063', '-0.2404566', '-0.5764577', '-0.07428007', '-0.30917987', '-0.052297395', '-0.15342261', '0.1807168', '-0.11934112', '-0.3848494', '-0.65616834']\n"
          ]
        }
      ],
      "source": [
        "rand_vec = pickle.load(open(word2vec_random_vec_path, 'rb'))\n",
        "print(rand_vec[:])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 0,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 641
        },
        "colab_type": "code",
        "id": "Qmx6B9Heni5e",
        "outputId": "34946ad5-0f48-4ebc-adf5-9507f3406fc0"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "[-0.06202219  0.49705315 -0.36654922 -0.43995985  0.30156144  0.06809836\n",
            " -0.46773717  0.06931866 -0.32421592  0.44589454 -0.90800893 -0.20277438\n",
            " -0.0402272  -0.44873247  0.00143446 -0.04454809 -0.30091795  0.02056202\n",
            "  0.10120315  0.16264242 -0.02090122 -0.08942807 -0.5563926   0.14661679\n",
            "  0.14776026 -0.34733492 -0.6683234   0.26168486  0.3407398  -0.3354475\n",
            " -0.1255758  -0.02666929  0.0918361   0.5009282  -0.45792708 -0.04439764\n",
            " -0.5499033  -0.11379562  1.1885228  -0.05891194 -0.41393474  0.33377483\n",
            "  0.27774987 -0.57757825  0.1551013  -0.5133394  -0.04484083  0.17051798\n",
            "  0.5772125   0.5979373   0.3057155  -0.58410317 -0.49211395  0.24493715\n",
            "  0.03065388 -0.13294867 -0.4728327  -0.3658061  -0.13136336  0.84137726\n",
            "  0.35759398 -0.39076868 -0.47601986 -0.15807828 -0.32230842  0.5736673\n",
            "  0.10251016 -0.22305535  0.6019472   0.4420645  -0.46943384 -0.29570258\n",
            "  0.22330889 -0.08749167  0.68458676  0.12052266  0.41199297 -0.29102385\n",
            "  0.13900696  0.30740988 -0.20342875 -0.15324421 -0.09903324 -0.5130624\n",
            "  0.81314605 -0.31369278  0.21130149 -0.20777808  0.4112     -0.10096193\n",
            "  0.03762949 -0.53771573  0.08763289 -0.46239078  0.01275796  0.06246767\n",
            " -0.5669881  -0.5315525  -0.48883438  0.0832037   0.38436732  0.04444712\n",
            "  0.1402283  -0.41875878  0.01640668 -0.23719022 -0.15593818 -0.10012998\n",
            " -0.09299469  0.10641771 -0.6308218  -0.04487132 -0.2505729   0.3566824\n",
            "  0.5091699  -0.32189533 -0.34231603  0.6467254   0.16248347  0.47121587\n",
            " -0.10252415 -0.09732677 -0.56183124 -0.14052574  0.0114684  -0.02865261\n",
            "  0.04350037 -0.28032795 -0.2645208  -0.22370586 -0.4220163   0.07244505\n",
            " -0.3648586  -0.08864754 -0.12027645  0.13662106  0.45457038  0.550946\n",
            "  0.40306813 -0.16677737 -0.18171692  0.23511265  0.00528849  0.32010877\n",
            " -0.1576755   0.24805954 -0.2032046  -0.18395862 -0.27307007  0.2474421\n",
            " -0.20866701  0.84966844  0.44931448 -0.11688226  0.4526595   0.83472836\n",
            " -0.07151044 -0.20185058 -0.15870114  0.27528426 -0.608415   -0.01909444\n",
            " -0.15704517  0.19005245  0.14632659 -0.3669248  -0.1399837  -0.6684023\n",
            "  0.01911508  0.05633112 -0.6122163  -0.3967059  -0.00873439  0.33832374\n",
            " -0.37785307 -0.2154151   0.58528256 -0.94987905 -0.11023157  0.3746075\n",
            " -0.03936451 -0.37785843  0.7483953   0.20664652  0.11635872 -0.23331513\n",
            "  0.02097523 -0.4561034   0.20162527  0.03597606 -0.2404566  -0.5764577\n",
            " -0.07428007 -0.30917987 -0.05229739 -0.15342261  0.1807168  -0.11934112\n",
            " -0.3848494  -0.65616834]\n"
          ]
        }
      ],
      "source": [
        "r = np.asarray(rand_vec, dtype=np.float32)\n",
        "print(r)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "Z5EBOXm3YSE7"
      },
      "source": [
        "## Importing the word2vec dictionary and testing it"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 0,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 954
        },
        "colab_type": "code",
        "id": "8Az_BeaAYYvx",
        "outputId": "7bc5ae16-4986-4aa1-d6c9-de477610348d"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Vector: [ 4.24427539e-01  2.85514832e-01 -3.11622024e-01 -1.32121697e-01\n",
            "  2.73986846e-01  2.17652440e-01 -1.50872663e-01 -1.41570359e-01\n",
            "  7.89194703e-02  5.73428690e-01  1.86945692e-01 -4.42874953e-02\n",
            " -1.65039986e-01  2.26775661e-01  5.18160045e-01 -1.28099829e-01\n",
            " -2.66403615e-01  2.39056930e-01 -1.64566576e-01 -2.05159038e-01\n",
            "  2.21554306e-03 -2.66948342e-01  7.30192140e-02 -7.65029266e-02\n",
            "  1.08069591e-01 -2.51822472e-01 -1.19432032e-01  3.42785418e-01\n",
            "  1.80453602e-02  3.84880975e-02  1.22721367e-01  4.11244482e-01\n",
            "  1.88945293e-01  4.54071879e-01  9.38699692e-02  3.35199863e-01\n",
            " -2.07605273e-01  6.46782041e-01  3.92761547e-03  1.06752619e-01\n",
            " -2.06900164e-01 -8.00306164e-03 -1.45829603e-01  1.84589073e-01\n",
            " -4.33346592e-02  5.00631854e-02  4.97310376e-03 -2.26222664e-01\n",
            " -8.32117647e-02  3.89308631e-01 -1.13335550e-01 -4.29326028e-01\n",
            "  5.25964558e-01  1.57237262e-01 -5.26877642e-01 -2.06921771e-01\n",
            "  2.54119724e-01 -2.62810141e-01  4.10198420e-02 -1.90848693e-01\n",
            " -2.08571911e-01 -1.67162001e-01  8.19580480e-02  3.60590309e-01\n",
            " -3.16385180e-01  1.76296771e-01 -1.20085463e-01  1.53609216e-01\n",
            "  1.72120899e-01 -2.04515427e-01 -2.53260076e-01  1.64396405e-01\n",
            "  3.50658715e-01 -1.69207305e-01 -6.25559017e-02  2.84702837e-01\n",
            " -3.23076695e-02 -1.02616213e-01 -1.55196235e-01  7.19997585e-02\n",
            " -1.74826086e-01 -1.98527217e-01 -3.09444427e-01 -3.17376763e-01\n",
            "  2.00140476e-01 -3.71931762e-01  1.21485628e-01 -9.93213356e-02\n",
            "  1.91478208e-01  5.77942953e-02  2.25791678e-01  5.48116803e-01\n",
            " -3.05028647e-01  4.06201184e-01  4.01925743e-01  1.50316209e-01\n",
            "  5.98512232e-01 -1.25639513e-02  5.13553508e-02 -1.26702175e-01\n",
            "  3.88815016e-01 -7.60614499e-02  1.23914346e-01  4.66492027e-01\n",
            " -2.10618809e-01  2.58216590e-01  3.70904543e-02  4.48366046e-01\n",
            " -1.01617448e-01  5.41833878e-01 -1.85380161e-01 -4.27940398e-01\n",
            "  1.95838302e-01  2.43061066e-01  6.95556551e-02  5.30793741e-02\n",
            "  4.05757785e-01  1.69887051e-01 -1.66440532e-01 -1.13970518e-01\n",
            " -1.37826145e-01  1.55042365e-01 -2.83741117e-01 -1.32790118e-01\n",
            "  2.81213999e-01 -5.07387102e-01 -1.32747754e-01 -1.09551229e-01\n",
            " -3.62257793e-04 -3.72484654e-01  5.34646809e-01  4.71991263e-02\n",
            "  1.20899692e-01 -5.69930494e-01  1.02326542e-01 -3.15892935e-01\n",
            "  4.75350797e-01  3.12345952e-01 -3.34250003e-01 -3.64454314e-02\n",
            "  1.01569280e-01 -7.30682462e-02 -4.34069484e-01  2.06657290e-01\n",
            " -2.96378165e-01 -1.49641439e-01  2.99973518e-01  1.21395335e-01\n",
            " -1.23879880e-01 -1.36004806e-01 -3.94222796e-01 -2.87730008e-01\n",
            " -4.45824265e-01  3.88584509e-02  1.15298450e-01 -2.39803027e-02\n",
            "  1.13722861e-01 -2.63144583e-01  1.89594939e-01 -1.88183084e-01\n",
            " -2.97673251e-02  6.13859117e-01  1.51209891e-01  2.75130868e-01\n",
            " -8.24735090e-02 -1.49875835e-01 -1.27023920e-01 -2.64450967e-01\n",
            " -4.06141698e-01  8.17925110e-02  2.32096642e-01 -1.68902606e-01\n",
            " -2.41545513e-01  1.02268837e-01  1.47850737e-01  2.10308507e-02\n",
            " -5.63572705e-01 -1.44536823e-01 -2.59835690e-01 -1.41913533e-01\n",
            " -1.20196573e-01 -5.56254864e-01 -2.88905025e-01  2.94693738e-01\n",
            "  3.12818199e-01  4.21376020e-01  2.50915885e-01  6.42729178e-02\n",
            "  2.06329405e-01 -2.28851572e-01 -8.03384483e-02  7.57951364e-02\n",
            " -1.26411393e-01  1.55616909e-01 -5.54103702e-02  2.40473568e-01\n",
            " -3.99701267e-01  1.66336626e-01 -8.77441559e-03  1.45007536e-01] \n",
            "Vector Shape: (200,)\n"
          ]
        }
      ],
      "source": [
        "word2vec_dict = pickle.load(open(word2vec_path_dictionary, 'rb'))  # Loading word2vec dictionary using Pickle\n",
        "sample_vector = word2vec_dict['threshold']      # Querying vector correspond to the word 'threshold'\n",
        "print(\"Vector:\", sample_vector, \"\\nVector Shape:\", sample_vector.shape)"
      ]
    }
  ]
}