[030aeb]: / dosma / utils / io_utils.py

Download this file

149 lines (107 with data), 3.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import logging
import os
import pickle
import warnings
from typing import Sequence
import h5py
import pandas as pd
from dosma.utils.logger import setup_logger
__all__ = ["mkdirs", "save_pik", "load_pik", "save_h5", "load_h5", "save_tables"]
def mkdirs(dir_path: str):
"""Make directory is directory does not exist.
Args:
dir_path (str): Directory path to make.
Returns:
str: path to directory
"""
if not os.path.isdir(dir_path):
os.makedirs(dir_path)
return dir_path
def save_pik(file_path: str, data):
"""Save data using `pickle`.
Pickle is not be stable across Python 2/3.
Args:
file_path (str): File path to save to.
data (Any): Data to serialize.
"""
mkdirs(os.path.dirname(file_path))
with open(file_path, "wb") as f:
pickle.dump(data, f)
def load_pik(file_path: str):
"""Load data using `pickle`.
Should be used with :any:`save_pik`.
Pickle is not be stable across Python 2/3.
Args:
file_path (str): File path to load from.
Returns:
Any: Loaded data.
Raises:
FileNotFoundError: If `file_path` does not exist.
"""
if not os.path.isfile(file_path):
raise FileNotFoundError("{} does not exist".format(file_path))
with open(file_path, "rb") as f:
return pickle.load(f)
def save_h5(file_path: str, data_dict: dict):
"""Save data in H5DF format.
Args:
file_path (str): File path to save to.
data_dict (dict): Dictionary of data to store. Dictionary can only have depth of 1.
"""
mkdirs(os.path.dirname(file_path))
with h5py.File(file_path, "w") as f:
for key in data_dict.keys():
f.create_dataset(key, data=data_dict[key])
def load_h5(file_path):
"""Load data in H5DF format.
Args:
file_path (str): File path to save to.
Returns:
dict: Loaded data.
Raises:
FileNotFoundError: If `file_path` does not exist.
"""
if not os.path.isfile(file_path):
raise FileNotFoundError("{} does not exist".format(file_path))
data = {}
with h5py.File(file_path, "r") as f:
for key in f.keys():
data[key] = f.get(key).value
return data
def save_tables(
file_path: str, data_frames: Sequence[pd.DataFrame], sheet_names: Sequence[str] = None
):
"""Save data in excel tables.
Args:
file_path (str): File path to excel file.
data_frames (Sequence[pd.DataFrame]): Tables to store to excel file.
One table stored per sheet.
sheet_names (:obj:`Sequence[str]`, optional): Sheet names for each data frame.
"""
mkdirs(os.path.dirname(file_path))
writer = pd.ExcelWriter(file_path)
if sheet_names is None:
sheet_names = []
for i in range(len(data_frames)):
sheet_names.append("Sheet%d" % (i + 1))
if len(data_frames) != len(sheet_names):
raise ValueError("Number of data_frames and sheet_frames should be the same")
for i in range(len(data_frames)):
df = data_frames[i]
df.to_excel(writer, sheet_names[i], index=False)
writer.save()
def init_logger(log_file: str, debug: bool = False): # pragma: no cover
"""Initialize logger.
Args:
log_file (str): File path to log file.
debug (:obj:`bool`, optional): If ``True``, debug mode will be enabled for the logger.
This means debug statements will also be written to the log file.
Defaults to ``False``.
"""
warnings.warn(
"init_logger is deprecated since v0.0.14 and will no longer be "
"supported in v0.13. Use `dosma.setup_logger` instead.",
DeprecationWarning,
)
level = logging.DEBUG if debug else logging.INFO
setup_logger(log_file, level=level)