[71ad2f]: / src / cnn / cnn_utils.py

Download this file

27 lines (24 with data), 788 Bytes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import numpy as np
def character_index(sentence, vocabulary, sequence_length = 500):
index_list = []
for i in range(len(sentence)):
if i > sequence_length-1:
break
else:
if sentence[i] in vocabulary:
index_list.append(vocabulary.index(sentence[i]) + 1)
else :
index_list.append(len(vocabulary)+1)
if len(index_list) == sequence_length:
return index_list
else:
index_list.extend([0]*(sequence_length-len(index_list)))
return index_list
def character_embedding(index_list, vocabulary):
embedding_weights = []
for index,i in enumerate(index_list):
one_hot = np.zeros(len(vocabulary)+1)
if i != 0:
one_hot[i-1] = 1
embedding_weights.append(one_hot)
return np.array(embedding_weights,dtype = 'float32').T