Switch to unified view

a b/development/csv2db/csv2db.py
1
import re
2
import math
3
import MySQLdb
4
import pandas as pd
5
import mysql.connector
6
from sqlalchemy import create_engine
7
8
9
class CSV2DB():
10
    def __init__(self, csv_path, password, database, 
11
                 table, host, user, port, columns):
12
    
13
        super(CSV2DB, self).__init__
14
        self.df = pd.read_csv(csv_path)
15
        self.columns = columns
16
        self.database = database
17
        self.table = table
18
        self.host = host
19
        self.port = port
20
        self.user = user
21
        self.password = password
22
        self.db = mysql.connector.connect(
23
            host=self.host,
24
            user=self.user,
25
            password=self.password,
26
            port=self.port,
27
        )
28
29
    def select_column(self):
30
        if self.columns is not None:
31
            self.df = self.df[self.columns]
32
33
    def rm_multi_newline(self):
34
        self.df = self.df.replace(r'\n+', '\n', regex=True)
35
36
    def norm_column_name(self):
37
        columns = [column.replace('?', '') for column in self.df.columns]
38
        columns = [re.sub('-| ', '_', column) for column in columns]
39
        self.df.columns = columns
40
41
    def check_length(self):
42
        df_length = self.df.astype('str').applymap(lambda x: len(x)).max()
43
        self.name_length = df_length.iloc[0]
44
45
        if max(df_length) > 65535:
46
            print(f'Error: Length of some fields in the "{df_length.idxmax()}" column is too large.\n\
47
This might cause you to lose some parts of your data during conversion\n\
48
Please use other dtypes than TEXT in to_sql command. Ex. MEDIUMTEXT or LONGTEXT')
49
            exit()
50
51
    def create_database(self):
52
        cursor = self.db.cursor()
53
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS {self.database}")
54
55
    def create_engine(self):
56
        self.engine = create_engine(
57
            f"mysql+mysqldb://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}?charset=utf8")
58
59
    def to_sql(self):
60
        self.df.to_sql(self.table, self.engine, if_exists='fail', index=True)
61
62
    def add_primary_key(self):
63
        with self.engine.connect() as con:
64
            con.execute(f'ALTER TABLE `{self.table}` ADD PRIMARY KEY (`index`);')
65
66
    def change_type(self):
67
        name_length = int(math.ceil(self.name_length / 500.0)) * 500
68
        name_column = self.df.columns[0]
69
70
        with self.engine.connect() as con:
71
            con.execute(f'ALTER TABLE `{self.table}` MODIFY `{name_column}` VARCHAR({name_length});')
72
73