[349d16]: / code / data_preprocessing / embed_words.py

Download this file

19 lines (15 with data), 665 Bytes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
import numpy as np
from gensim.models.keyedvectors import KeyedVectors
word2vec_path_bin = '/media/ramkabir/PC Data/ASU Data/Semester 3/BMNLP/Projects/Medical Data/embeddings/bio_embedding_extrinsic.bin'
word2vec_path_txt = '/media/ramkabir/PC Data/ASU Data/Semester 3/BMNLP/Projects/Medical Data/embeddings/bio_embedding_extrinsic.txt'
counter = 0
limit = 10
# model = KeyedVectors.load_word2vec_format(word2vec_path_bin, binary=True)
# model.save_word2vec_format(word2vec_path_txt, binary=False)
with open(word2vec_path_txt) as f:
for line in f:
if counter == limit:
break
else:
print(line)
counter += 1