|
a |
|
b/development/csv2db/csv2db.py |
|
|
1 |
import re |
|
|
2 |
import math |
|
|
3 |
import MySQLdb |
|
|
4 |
import pandas as pd |
|
|
5 |
import mysql.connector |
|
|
6 |
from sqlalchemy import create_engine |
|
|
7 |
|
|
|
8 |
|
|
|
9 |
class CSV2DB(): |
|
|
10 |
def __init__(self, csv_path, password, database, |
|
|
11 |
table, host, user, port, columns): |
|
|
12 |
|
|
|
13 |
super(CSV2DB, self).__init__ |
|
|
14 |
self.df = pd.read_csv(csv_path) |
|
|
15 |
self.columns = columns |
|
|
16 |
self.database = database |
|
|
17 |
self.table = table |
|
|
18 |
self.host = host |
|
|
19 |
self.port = port |
|
|
20 |
self.user = user |
|
|
21 |
self.password = password |
|
|
22 |
self.db = mysql.connector.connect( |
|
|
23 |
host=self.host, |
|
|
24 |
user=self.user, |
|
|
25 |
password=self.password, |
|
|
26 |
port=self.port, |
|
|
27 |
) |
|
|
28 |
|
|
|
29 |
def select_column(self): |
|
|
30 |
if self.columns is not None: |
|
|
31 |
self.df = self.df[self.columns] |
|
|
32 |
|
|
|
33 |
def rm_multi_newline(self): |
|
|
34 |
self.df = self.df.replace(r'\n+', '\n', regex=True) |
|
|
35 |
|
|
|
36 |
def norm_column_name(self): |
|
|
37 |
columns = [column.replace('?', '') for column in self.df.columns] |
|
|
38 |
columns = [re.sub('-| ', '_', column) for column in columns] |
|
|
39 |
self.df.columns = columns |
|
|
40 |
|
|
|
41 |
def check_length(self): |
|
|
42 |
df_length = self.df.astype('str').applymap(lambda x: len(x)).max() |
|
|
43 |
self.name_length = df_length.iloc[0] |
|
|
44 |
|
|
|
45 |
if max(df_length) > 65535: |
|
|
46 |
print(f'Error: Length of some fields in the "{df_length.idxmax()}" column is too large.\n\ |
|
|
47 |
This might cause you to lose some parts of your data during conversion\n\ |
|
|
48 |
Please use other dtypes than TEXT in to_sql command. Ex. MEDIUMTEXT or LONGTEXT') |
|
|
49 |
exit() |
|
|
50 |
|
|
|
51 |
def create_database(self): |
|
|
52 |
cursor = self.db.cursor() |
|
|
53 |
cursor.execute(f"CREATE DATABASE IF NOT EXISTS {self.database}") |
|
|
54 |
|
|
|
55 |
def create_engine(self): |
|
|
56 |
self.engine = create_engine( |
|
|
57 |
f"mysql+mysqldb://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}?charset=utf8") |
|
|
58 |
|
|
|
59 |
def to_sql(self): |
|
|
60 |
self.df.to_sql(self.table, self.engine, if_exists='fail', index=True) |
|
|
61 |
|
|
|
62 |
def add_primary_key(self): |
|
|
63 |
with self.engine.connect() as con: |
|
|
64 |
con.execute(f'ALTER TABLE `{self.table}` ADD PRIMARY KEY (`index`);') |
|
|
65 |
|
|
|
66 |
def change_type(self): |
|
|
67 |
name_length = int(math.ceil(self.name_length / 500.0)) * 500 |
|
|
68 |
name_column = self.df.columns[0] |
|
|
69 |
|
|
|
70 |
with self.engine.connect() as con: |
|
|
71 |
con.execute(f'ALTER TABLE `{self.table}` MODIFY `{name_column}` VARCHAR({name_length});') |
|
|
72 |
|
|
|
73 |
|