70 lines (69 with data), 1.5 kB
{
"cells": [
{
"cell_type": "markdown",
"id": "8468ad0d",
"metadata": {},
"source": [
"# Undersampling"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "cfc764c8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1694\n",
"Done\n"
]
}
],
"source": [
"import os\n",
"import random\n",
"import shutil\n",
"\n",
"source = r\"F:\\Leuk study re-designed\\C-NMC\\High imbalance\\Train - 1 to 102 ratio\\enhanched\\hem\"\n",
"dest = r\"F:\\Leuk study re-designed\\C-NMC\\High imbalance\\Majority Calss Undersample\\enhanched\\hem\"\n",
"files = os.listdir(source)\n",
"\n",
"percentage = 50 #High Imbalance\n",
"# percentage = 10 #Low Imbalance\n",
"\n",
"no_of_files = int((percentage/100)*len(files))\n",
"print(no_of_files)\n",
"# no_of_files = len(files) // 5\n",
"\n",
"for file_name in random.sample(files, no_of_files):\n",
" shutil.copy(os.path.join(source, file_name), dest)\n",
"print('Done')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "leukemia",
"language": "python",
"name": "leukemia"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}