Diff of /data_preprocessing.ipynb [000000] .. [134fd7]

Switch to unified view

a b/data_preprocessing.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": null,
6
   "metadata": {},
7
   "outputs": [],
8
   "source": [
9
    "from clinical_ts.timeseries_utils import *\n",
10
    "from clinical_ts.ecg_utils import *"
11
   ]
12
  },
13
  {
14
   "cell_type": "markdown",
15
   "metadata": {},
16
   "source": [
17
    "# prepare data"
18
   ]
19
  },
20
  {
21
   "cell_type": "code",
22
   "execution_count": null,
23
   "metadata": {},
24
   "outputs": [],
25
   "source": [
26
    "target_fs=100\n",
27
    "data_root=Path(\"./ecg_data/\")\n",
28
    "target_root=Path(\"./ecg_data_processed\")"
29
   ]
30
  },
31
  {
32
   "cell_type": "markdown",
33
   "metadata": {},
34
   "source": [
35
    "## Ribeiro 2020"
36
   ]
37
  },
38
  {
39
   "cell_type": "markdown",
40
   "metadata": {},
41
   "source": [
42
    "Download the test set from Ribeiro et al 2020 (https://www.nature.com/articles/s41467-020-15432-4) https://doi.org/10.5281/zenodo.3625006 and place it in data_folder_ribeiro_test"
43
   ]
44
  },
45
  {
46
   "cell_type": "code",
47
   "execution_count": null,
48
   "metadata": {},
49
   "outputs": [],
50
   "source": [
51
    "data_folder_ribeiro_test = data_root/\"ribeiro2020_test\"\n",
52
    "target_folder_ribeiro_test = target_root/(\"ribeiro_fs\"+str(target_fs))"
53
   ]
54
  },
55
  {
56
   "cell_type": "code",
57
   "execution_count": null,
58
   "metadata": {},
59
   "outputs": [],
60
   "source": [
61
    "\n",
62
    "df_ribeiro_test, lbl_itos_ribeiro_test,  mean_ribeiro_test, std_ribeiro_test = prepare_data_ribeiro_test(data_folder_ribeiro_test, target_fs=target_fs, channels=12, channel_stoi=channel_stoi_default, target_folder=target_folder_ribeiro_test)"
63
   ]
64
  },
65
  {
66
   "cell_type": "code",
67
   "execution_count": null,
68
   "metadata": {},
69
   "outputs": [],
70
   "source": [
71
    "#reformat everything as memmap for efficiency\n",
72
    "reformat_as_memmap(df_ribeiro_test, target_folder_ribeiro_test/(\"memmap.npy\"),data_folder=target_folder_ribeiro_test,delete_npys=True)"
73
   ]
74
  },
75
  {
76
   "cell_type": "markdown",
77
   "metadata": {},
78
   "source": [
79
    "## Zheng 2020"
80
   ]
81
  },
82
  {
83
   "cell_type": "markdown",
84
   "metadata": {},
85
   "source": [
86
    "Download the dataset from Zheng et al 2020 (https://www.nature.com/articles/s41597-020-0386-x) https://figshare.com/collections/ChapmanECG/4560497/2 and place it in data_folder_zheng"
87
   ]
88
  },
89
  {
90
   "cell_type": "code",
91
   "execution_count": null,
92
   "metadata": {},
93
   "outputs": [],
94
   "source": [
95
    "data_folder_zheng = data_root/\"zheng2020/\"\n",
96
    "target_folder_zheng = target_root/(\"zheng_fs\"+str(target_fs))"
97
   ]
98
  },
99
  {
100
   "cell_type": "code",
101
   "execution_count": null,
102
   "metadata": {},
103
   "outputs": [],
104
   "source": [
105
    "\n",
106
    "df_zheng, lbl_itos_zheng,  mean_zheng, std_zheng = prepare_data_zheng(data_folder_zheng, denoised=False, target_fs=target_fs, channels=12, channel_stoi=channel_stoi_default, target_folder=target_folder_zheng)"
107
   ]
108
  },
109
  {
110
   "cell_type": "code",
111
   "execution_count": null,
112
   "metadata": {},
113
   "outputs": [],
114
   "source": [
115
    "#reformat everything as memmap for efficiency\n",
116
    "reformat_as_memmap(df_zheng, target_folder_zheng/(\"memmap.npy\"),data_folder=target_folder_zheng,delete_npys=True)"
117
   ]
118
  },
119
  {
120
   "cell_type": "markdown",
121
   "metadata": {},
122
   "source": [
123
    "## CinC2020 Challenge"
124
   ]
125
  },
126
  {
127
   "cell_type": "markdown",
128
   "metadata": {},
129
   "source": [
130
    "Download the training set of the CinC Challenge 2020 https://physionetchallenges.org/2020/ and place it in data_folder_cinc"
131
   ]
132
  },
133
  {
134
   "cell_type": "code",
135
   "execution_count": null,
136
   "metadata": {},
137
   "outputs": [],
138
   "source": [
139
    "data_folder_cinc = data_root/\"cinc2020/\"\n",
140
    "target_folder_cinc = target_root/(\"cinc_fs\"+str(target_fs))"
141
   ]
142
  },
143
  {
144
   "cell_type": "code",
145
   "execution_count": null,
146
   "metadata": {},
147
   "outputs": [],
148
   "source": [
149
    "\n",
150
    "df_cinc, lbl_itos_cinc,  mean_cinc, std_cinc = prepare_data_cinc(data_folder_cinc, target_fs=target_fs, channels=12, channel_stoi=channel_stoi_default, target_folder=target_folder_cinc)"
151
   ]
152
  },
153
  {
154
   "cell_type": "code",
155
   "execution_count": null,
156
   "metadata": {},
157
   "outputs": [],
158
   "source": [
159
    "#reformat everything as memmap for efficiency\n",
160
    "reformat_as_memmap(df_cinc, target_folder_cinc/(\"memmap.npy\"),data_folder=target_folder_cinc,delete_npys=True)\n"
161
   ]
162
  },
163
  {
164
   "cell_type": "markdown",
165
   "metadata": {},
166
   "source": [
167
    "## PTB-XL"
168
   ]
169
  },
170
  {
171
   "cell_type": "markdown",
172
   "metadata": {},
173
   "source": [
174
    "Download the PTB-XL dataset (https://www.nature.com/articles/s41597-020-0495-6) https://physionet.org/content/ptb-xl/1.0.1/ and place it in data_folder_ptb_xl"
175
   ]
176
  },
177
  {
178
   "cell_type": "code",
179
   "execution_count": null,
180
   "metadata": {},
181
   "outputs": [],
182
   "source": [
183
    "data_folder_ptb_xl = data_root/\"ptb_xl/\"\n",
184
    "target_folder_ptb_xl = target_root/(\"ptb_xl_fs\"+str(target_fs))"
185
   ]
186
  },
187
  {
188
   "cell_type": "code",
189
   "execution_count": null,
190
   "metadata": {},
191
   "outputs": [],
192
   "source": [
193
    "df_ptb_xl, lbl_itos_ptb_xl,  mean_ptb_xl, std_ptb_xl = prepare_data_ptb_xl(data_folder_ptb_xl, min_cnt=0, target_fs=target_fs, channels=input_channels, channel_stoi=channel_stoi_default, target_folder=target_folder_ptb_xl)"
194
   ]
195
  },
196
  {
197
   "cell_type": "code",
198
   "execution_count": null,
199
   "metadata": {},
200
   "outputs": [],
201
   "source": [
202
    "#reformat everything as memmap for efficiency\n",
203
    "reformat_as_memmap(df_ptb_xl, target_folder_ptb_xl/(\"memmap.npy\"),data_folder=target_folder_ptb_xl,delete_npys=True)"
204
   ]
205
  }
206
 ],
207
 "metadata": {
208
  "kernelspec": {
209
   "display_name": "Python 3",
210
   "language": "python",
211
   "name": "python3"
212
  },
213
  "language_info": {
214
   "codemirror_mode": {
215
    "name": "ipython",
216
    "version": 3
217
   },
218
   "file_extension": ".py",
219
   "mimetype": "text/x-python",
220
   "name": "python",
221
   "nbconvert_exporter": "python",
222
   "pygments_lexer": "ipython3",
223
   "version": "3.8.3"
224
  }
225
 },
226
 "nbformat": 4,
227
 "nbformat_minor": 2
228
}