Switch to unified view

a b/notebooks/data/processing.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 1,
6
   "metadata": {},
7
   "outputs": [],
8
   "source": [
9
    "import warnings\n",
10
    "import pandas as pd\n",
11
    "\n",
12
    "warnings.filterwarnings('ignore')"
13
   ]
14
  },
15
  {
16
   "cell_type": "code",
17
   "execution_count": 2,
18
   "metadata": {},
19
   "outputs": [],
20
   "source": [
21
    "# Load the noisy dataset.\n",
22
    "original = pd.read_csv('../../data/raw/original.csv')\n",
23
    "synthetic = pd.read_csv('../../data/raw/synthetic.csv')"
24
   ]
25
  },
26
  {
27
   "cell_type": "code",
28
   "execution_count": 3,
29
   "metadata": {},
30
   "outputs": [],
31
   "source": [
32
    "# Process the the datasets.\n",
33
    "original = original.drop_duplicates().dropna()\n",
34
    "synthetic = synthetic.drop_duplicates().dropna()"
35
   ]
36
  },
37
  {
38
   "cell_type": "code",
39
   "execution_count": 4,
40
   "metadata": {},
41
   "outputs": [],
42
   "source": [
43
    "# Feature engineering\n",
44
    "original[['GENDER', 'LUNG_CANCER']] = original[['GENDER', 'LUNG_CANCER']].replace({'M': 1, 'F': 2, 'YES': 1, 'NO': 0})\n",
45
    "synthetic[['GENDER', 'LUNG_CANCER']] = synthetic[['GENDER', 'LUNG_CANCER']].replace({'M': 1, 'F': 2, 'YES': 1, 'NO': 0})"
46
   ]
47
  },
48
  {
49
   "cell_type": "code",
50
   "execution_count": 5,
51
   "metadata": {},
52
   "outputs": [
53
    {
54
     "name": "stdout",
55
     "output_type": "stream",
56
     "text": [
57
      "Dataset processed.\n"
58
     ]
59
    }
60
   ],
61
   "source": [
62
    "# Saved dataset\n",
63
    "original.to_csv('../../data/processed/original.csv', index=False)\n",
64
    "synthetic.to_csv('../../data/processed/synthetic.csv', index=False)\n",
65
    "\n",
66
    "print('Dataset processed.')"
67
   ]
68
  }
69
 ],
70
 "metadata": {
71
  "kernelspec": {
72
   "display_name": "Python 3",
73
   "language": "python",
74
   "name": "python3"
75
  },
76
  "language_info": {
77
   "codemirror_mode": {
78
    "name": "ipython",
79
    "version": 3
80
   },
81
   "file_extension": ".py",
82
   "mimetype": "text/x-python",
83
   "name": "python",
84
   "nbconvert_exporter": "python",
85
   "pygments_lexer": "ipython3",
86
   "version": "3.12.2"
87
  }
88
 },
89
 "nbformat": 4,
90
 "nbformat_minor": 2
91
}