[b4a150]: / ReadersWriters / _CsvFile.py

Download this file

267 lines (248 with data), 10.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#
# Copyright 2017 University of Westminster. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
""" It is an interface for reading and writing Comma Separated Values (CSV) files.
"""
from typing import Dict, List, TypeVar, Any, Callable
import sys
import pandas as pd
import os
import csv
import logging
from Configs.CONSTANTS import CONSTANTS
PandasDataFrame = TypeVar('DataFrame')
__author__ = "Mohsen Mesgarpour"
__copyright__ = "Copyright 2016, https://github.com/mesgarpour"
__credits__ = ["Mohsen Mesgarpour"]
__license__ = "GPL"
__version__ = "1.1"
__maintainer__ = "Mohsen Mesgarpour"
__email__ = "mohsen.mesgarpour@gmail.com"
__status__ = "Release"
class CsvFile:
def __init__(self):
"""Initialise the objects and constants.
"""
self.__logger = logging.getLogger(CONSTANTS.app_name)
self.__logger.debug(__name__)
self.__path = None
def set(self,
path: str,
title: str,
ext: str = "csv"):
"""Set the CSV file for reading or writing.
:param path: the directory path of the CSV file.
:param title: the file name of the CSV file.
:param ext: the extension of the CSV file (default: 'csv').
"""
self.__logger.debug("Set the CSV file.")
self.__path = os.path.join(path, title + "." + ext)
def reset(self):
"""Reset the CSV file reader/writer.
"""
self.__logger.debug("Reset the CSV File.")
try:
open(self.__path, 'w').close()
except (OSError, IOError) as e:
self.__logger.error(__name__ + " - Can not open the file: " + self.__path + "\n" + str(e))
sys.exit()
except():
self.__logger.error(__name__ + " - Could not create the file: \n" + self.__path)
sys.exit()
def exists(self) -> bool:
"""Check if the CSV file exists.
:return: indicates if the file exists.
"""
self.__logger.debug("Check if the CSV file exists.")
return os.path.isfile(self.__path)
def exists_column(self,
column: str,
skip: int = 0) -> bool:
"""Check if the CSV file exists.
:param column: name of the column.
:param skip: lines to skip before reading or writing.
:return: indicates if the column exists.
"""
self.__logger.debug("Check if a column exists in the CSV File.")
i = 0
try:
with open(self.__path, "r") as f:
for line in f:
if i > skip:
if column not in set(line.split(",")):
return False
else:
return True
except (OSError, IOError) as e:
self.__logger.error(__name__ + " - Can not open the file: " + self.__path + "\n" + str(e))
sys.exit()
except():
self.__logger.error(__name__ + " - Can not read the file: \n" + self.__path)
sys.exit()
def read(self,
skip: int=0,
dataframing: bool=True,
**kwargs: Any) -> Callable[[List, PandasDataFrame], None]:
"""Read the CSV file into dataframe or list.
:param skip: lines to skip before reading.
:param dataframing: indicates if the outputs must be saved into dataframe.
:param kwargs: any other arguments that the selected reader may accept.
:return: the read file contents.
"""
self.__logger.debug("Read the CSV File.")
if dataframing:
rows = self.__read_dataframe(skip, **kwargs)
else:
rows = self.__read_array(skip)
return rows
def __read_dataframe(self,
skip: int=0,
**kwargs: Any) -> PandasDataFrame:
"""Read the CSV file into dataframe.
:param skip: lines to skip before reading.
:param kwargs: any other arguments that the selected reader may accept.
:return: the read file contents.
"""
self.__logger.debug("Read the CSV File into Dataframe.")
try:
rows = pd.read_csv(self.__path, skiprows=skip, **kwargs)
except():
self.__logger.error(__name__ + " - Can not read the file into a dataframe: \n" + self.__path)
sys.exit()
return rows
def __read_array(self,
skip: int = 0) -> List:
"""Read the CSV file into array.
:param skip: lines to skip before reading.
:return: the read file contents.
"""
self.__logger.debug("Read the CSV File into array.")
rows = []
i = 0
with open(self.__path, "r") as f:
try:
for line in f:
i += 1
if i > skip:
rows.append(line.split(","))
except (OSError, IOError) as e:
self.__logger.error(__name__ + " - Can not open the file: " + self.__path + "\n" + str(e))
sys.exit()
except():
self.__logger.error(__name__ + " - Can not read the file: \n" + self.__path)
sys.exit()
return rows
def append(self,
data: Callable[[List, Dict, PandasDataFrame], None],
**kwargs: Any):
"""Append to CSV file using dataframe, dictionary or list.
:param data: the data to write.
:param kwargs: any other arguments that the selected writer may accept.
"""
self.__logger.debug("Append to the CSV file.")
if isinstance(data, pd.DataFrame):
self.__append_dataframe(data, **kwargs)
elif isinstance(data, list):
self.__append_array(data)
elif isinstance(data, dict):
self.__append_dict(data)
else:
self.__logger.error(__name__ + " - Invalid object to write into file!\n" + str(type(data)))
sys.exit()
return True
def __append_dataframe(self,
data: PandasDataFrame,
max_line_width: int = 100000000,
**kwargs: Any):
"""Append to CSV file using dataframe.
:param data: the dataframe to write.
:param max_line_width: max line width (PANDAS: display.width).
:param kwargs: any other arguments that the selected writer may accept.
"""
self.__logger.debug("Append a Dataframe to the CSV file.")
kwargs["header"] = False if "header" not in kwargs.keys() else kwargs["header"]
kwargs["index"] = False if "index" not in kwargs.keys() else kwargs["index"]
try:
with open(self.__path, 'a') as f:
pd.set_option("display.width", max_line_width)
pd.set_option("display.max_rows", data.shape[0])
pd.set_option("display.max_columns", data.shape[1])
data.to_csv(f, header=kwargs["header"], index=kwargs["index"])
pd.reset_option("display.width")
pd.reset_option("display.max_rows")
pd.reset_option("display.max_columns")
except():
self.__logger.error(__name__ + " - Can not append dataframe to file: \n" + self.__path)
sys.exit()
def __append_dict(self,
data: Dict[str, str]):
"""Append to CSV file using dictionary.
:param data: the dictionary to write.
"""
self.__logger.debug("Append a Dictionary to the CSV file.")
try:
with open(self.__path, 'a') as f:
w = csv.DictWriter(f, data.keys())
w.writeheader()
w.writerow(data)
except (OSError, IOError) as e:
self.__logger.error(__name__ + " - Can not open the file: " + self.__path + "\n" + str(e))
sys.exit()
except():
self.__logger.error(__name__ + " - Can not append dictionary to file: \n" + self.__path)
sys.exit()
def __append_array(self,
data: List):
"""Append to CSV file using list.
:param data: the list to write.
"""
self.__logger.debug("Append an Array to the CSV file.")
if data is None or data == "" or data == []:
return
elif not isinstance(data, list):
data = [[data]]
elif not isinstance(data[0], list):
data = [data]
# write
try:
with open(self.__path, 'a+b') as f:
# flatten 2D list
for row in data:
f.write((",".join(row) + "\n").encode())
except (OSError, IOError) as e:
self.__logger.error(__name__ + " - Can not open the file: " + self.__path + "\n" + str(e))
sys.exit()
except():
self.__logger.error(__name__ + " - Can not write a row into the file: \n" + self.__path)
sys.exit()
def size(self) -> int:
"""Check number of lines in the CSV file.
:return: number of lines in the file.
"""
self.__logger.debug("Check number of lines in the CSV file.")
cnt_lines = 0
try:
with open(self.__path, "r") as f:
for _ in f:
cnt_lines += 1
except (OSError, IOError) as e:
self.__logger.error(__name__ + " - Can not open the file: " + self.__path + "\n" + str(e))
sys.exit()
except():
self.__logger.error(__name__ + " - Can not read the file: " + self.__path)
sys.exit()
return cnt_lines