--- a +++ b/exseek/scripts/table_converter.py @@ -0,0 +1,109 @@ +#! /usr/bin/env python + +import sys +import argparse +import os +import pandas as pd + +def open_file_or_stdout(filename): + if filename == '-': + return sys.stdout + else: + return open(filename, 'w') + +def open_file_or_stdin(filename): + if filename == '-': + return sys.stdin + else: + return open(filename, 'r') + +formats = ('table', 'csv', 'json', 'html', 'excel', 'hdf', 'sql', 'pickle') + +def detect_format(filename): + ext = os.path.splitext(filename)[1] + if not ext: + return + format = { + '.txt': 'table', + '.csv': 'csv', + '.json': 'json', + '.xls': 'excel', + '.xlsx': 'excel', + '.h5': 'hdf', + '.hdf5': 'hdf', + '.hdf': 'hdf', + '.sql': 'sql', + '.pkl': 'pickle', + '.pickle': 'pickle' + }.get(ext) + if format is None: + raise ValueError('unknown file extension: {}'.format(ext)) + return format + +if __name__ == '__main__': + parser = argparse.ArgumentParser('Convert table formats') + parser.add_argument('--input-file', '-i', type=str, default='-') + parser.add_argument('--output-file', '-o', type=str, default='-') + parser.add_argument('--sformat', '-s', type=str, + choices=formats, help='input format') + parser.add_argument('--dformat', '-d', type=str, + choices=formats, help='output format') + parser.add_argument('--reader-args', '-r', type=str, action='append') + parser.add_argument('--writer-args', '-w', type=str, action='append') + args = parser.parse_args() + + reader_args = {} + if args.reader_args: + for arg in args.reader_args: + c = arg.split('=') + if len(c) != 2: + raise ValueError('reader args should be specified as key=value') + + sformat = detect_format(args.input_file) + if not sformat: + sformat = args.sformat + if not sformat: + raise ValueError('cannot detect format from input filename and --sformat is not specified') + + import pandas as pd + read_df = { + 'table': pd.read_table, + 'csv': pd.read_csv, + 'json': pd.read_json, + 'html': pd.read_html, + 'excel': pd.read_excel, + 'hdf': pd.read_hdf, + 'sql': pd.read_sql, + 'pickle': pd.read_pickle + }[sformat] + + with open_file_or_stdin(args.input_file) as f: + df = read_df(f, **reader_args) + + dformat = detect_format(args.output_file) + if not dformat: + dformat = args.dformat + if not dformat: + raise ValueError('cannot detect format from output filename and --dformat is not specified') + + write_df = { + 'table': df.to_csv, + 'csv': df.to_csv, + 'json': df.to_json, + 'html': df.to_html, + 'excel': df.to_excel, + 'hdf': df.to_hdf, + 'sql': df.to_sql, + 'pickle': df.to_pickle + }[dformat] + + writer_args = {} + if args.writer_args: + for arg in args.writer_args: + c = arg.split('=') + if len(c) != 2: + raise ValueError('writer args should be specified as key=value') + writer_args[c[0]] = c[1] + + with open_file_or_stdout(args.output_file) as f: + write_df(f, **writer_args)