[8c4e02]: / scripts / generate_libri_csv.py

Download this file

31 lines (23 with data), 1.2 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import os
import sys
def main(libri_root, out_file):
out_file = open(out_file, 'w')
for top_dir in os.listdir(libri_root):
if top_dir == 'train-clean-100' or top_dir == 'train-clean-360':
for speaker in os.listdir(libri_root + '/' + top_dir):
for section in os.listdir(libri_root + '/' + top_dir + '/' + speaker):
trans_file = libri_root + '/' + top_dir + '/' + speaker + '/' + section + '/' + \
speaker + '-' + section + '.trans.txt'
with open(trans_file, 'r') as t:
for line in t:
id_, transcript = line[:-1].split(' ', 1)
transcript = transcript.lower()
audio_file_path = top_dir + '/' + speaker + '/' + section + '/' + \
id_ + '.wav'
out_file.write('libri_' + id_ + '\t' + audio_file_path + '\t' + transcript+'\n')
out_file.close()
if __name__ == '__main__':
# needs two command line argument.
# 1. root path of LibriSpeech
# 2. output csv path
main(sys.argv[1], sys.argv[2])