[7bf731]: / 03-Experiments / __pycache__ / Experiments.cpython-310.pyc

Download this file

44 lines (44 with data), 6.9 kB

o

Õ6(fÚã@s
ddlZddlmZddlmZddlZddl	Z
ddlmZddlm
Z
ddlZddlmZdd„Zdd	„Zd
d„Zdd
„Zdd„Zdd„Zdd„Zdd„Zdd„Zdd„Zdd„Zdd„Zdd„Zdd „Zd!d"„Zd#d$„Z d%d&„Z!d'd(„Z"d)d*„Z#d+d,„Z$d0d.d/„Z%dS)1éN)Útrain_test_split)ÚMinMaxScaler)ÚPolynomialFeatures)Úaccuracy_scorecCsŒt |¡}t|ddd\}}t|ddd\}}|jdgdd ¡jdd	}|jdgdd ¡jdd	}|jdgdd ¡jdd	}|||fS)
NgffffffÖ?é*)Z	test_sizeÚrandom_stategš™™™™™É?Úidé©ZaxisT)Údrop)ÚpdZread_csvrrZdrop_duplicatesZreset_index)ÚpathÚdfÚtrain_dfZtest_dfZval_df©rú=/Users/arham/Downloads/Projects/03-Experiments/experiments.pyÚ	load_datas

rcCs*ddddddddœ}|d	 |¡|d	<|S)
Nrr	ééééé)ZInsufficient_WeightZ
Normal_WeightZOverweight_Level_IZOverweight_Level_IIZObesity_Type_IZObesity_Type_IIZObesity_Type_IIIZ
NObeyesdad©Úmap)ÚtrainZ
target_keyrrrÚ
encode_targetsrcCs|d dddœ¡|d<dS)NÚGenderrr	)ÚMaleÚFemaler©rrrrÚmake_gender_binarysr cCs:|d t¡|d<|d t¡|d<|d t¡|d<|S)NÚWeightÚAgeÚHeight)ÚastypeÚfloatrrrrÚ	datatypessr&cCsdg}|dD]%}|dkrd}n|dkrd}n|dkrd}n	|dkr$d	}nd
}| |¡q||d<|S)Nr"ér	éré(ré2rrZ	Age_Group)Úappend)rZ
age_groupsZageZ	age_grouprrrÚage_binning)sr,cCó(|d t¡|d<t |d¡|d<|S)Nr"ZLog_Age©r$r%ÚnpÚlog1p©rrrrÚage_scaling_log:ór2cCó<|d t¡|d<tƒ}| |dj dd¡¡|d<||fS)Nr"éÿÿÿÿr	Ú
Scaled_Age©r$r%rÚ
fit_transformÚvaluesÚreshape)rÚ
scaler_agerrrÚage_scaling_minmax?ór<cCr-)Nr!Z
Log_Weightr.r1rrrÚweight_scaling_logEr3r>cCr4)Nr!r5r	Ú
Scaled_Weightr7)rÚ
scaler_weightrrrÚweight_scaling_minmaxJr=rAcCst |d¡|d<|S)Nr#Z
Log_Height)r/r0r1rrrÚheight_scaling_logPsrBcCs*tƒ}| |dj dd¡¡|d<||fS)Nr#r5r	Ú
Scaled_Height)rr8r9r:)rÚ
scaler_heightrrrÚheight_scaling_minmaxTsrEcCs|d dddœ¡|d<|S)Nrr	r)rrrrrrrr YscCs@gd¢}|D]}|| dddœ¡||<|| t¡||<q|S)N)Zfamily_history_with_overweightZFAVCZSCCZSMOKEr	r)ÚyesÚno)rr$Úint)rZBinary_ColsÚcolrrrÚfix_binary_columns]s
rJcCs2ddg}|D]}|| dddddœ¡||<q|S)NZCAECZCALCrr	rr)rGZ	SometimesZ
FrequentlyÚAlwaysr)rZcat_colsrIrrrÚ
freq_cat_colsfsrLcCsntj|dgd}|d t¡|d<|d t¡|d<|d t¡|d<|d t¡|d<|d t¡|d<|S)z¯
    Public_Transportation    8692
    Automobile               1835
    Walking                   231
    Motorbike                  19
    Bike                       16
    ZMTRANS©ÚcolumnsZMTRANS_AutomobileZMTRANS_WalkingZMTRANS_MotorbikeZMTRANS_BikeZMTRANS_Public_Transportation)rZget_dummiesr$rHrrrrÚMtransms
rOcCs\|d|dd|d<tdd}| |ddg¡}tj|gd¢d}tj||gd	d
}|S)Nr!r#rZBMI)Zdegreer")zAge^2zAge^3zBMI^2z	Age * BMIzAge * BMI^2z
Age^2 * BMI^2rMr	r
)rr8rÚ	DataFrameÚconcat)rZpolynomial_featuresZX_polyZpoly_features_dfrrrÚother_featuress
rRcCs°t|ƒ}t|ƒ}t|ƒ}t|ƒ}| |dj dd¡¡|d<t|ƒ}| |dj dd¡¡|d<t|ƒ}| |dj dd¡¡|d<t	|ƒ}t
|ƒ}t|ƒ}t|ƒ}t
|ƒ}|S)	Nr"r5r	r6r!r?r#rC)r&rr,r2Ú	transformr9r:r>rBr rJrLrOrR)Útestr;r@rDrrrÚ
test_pipeline‹srUcCs t ||¡}tj||dd}|S)Néè)Znum_boost_round)ÚlgbZDatasetr)ÚparamsÚX_trainÚy_trainZ	lgb_trainÚmodelrrrÚtrain_modelsr\cCs&| |¡}dd„|Dƒ}t||ƒ}|S)NcSsg|]}t |¡‘qSr)r/Úargmax)Ú.0ÚyrrrÚ
<listcomp>¤sz"evaluate_model.<locals>.<listcomp>)Zpredictr)r[ÚX_valÚy_valZy_predÚaccuracyrrrÚevaluate_model¢s

rdc
CsÐdddd| ddd¡| dd	d
¡| ddd
¡| ddd¡| ddd¡ddœ
}d}t|ddd}g}| ||¡D]+\}}|j||j|}	}
|j||j|}}t||	|ƒ}
t|
|
|ƒ}| |¡q7t	 
|¡S)NZ
multiclasséZ
multi_loglossZgbdtÚ
learning_rateg{®Gázt?gà?Ú
num_leavesé
rVÚ	max_depthr5r'Úbagging_fractiong333333ã?gffffffî?Úfeature_fraction)
Ú	objectiveZ	num_classZmetricZ
boosting_typerfrgrirjrkÚ	verbosityrTr)Ún_splitsÚshuffler)Zsuggest_loguniformZsuggest_intZsuggest_uniformZStratifiedKFoldÚsplitÚilocr\rdr+r/Úmean)ÚtrialrYrZrXrnZkfZscoresZtrain_indexZ	val_indexZX_trraZy_trrbr[rcrrrrl¨s*ö

rlrcs*tjdd}|j‡‡fdd„|d|jS)NZmaximize)Ú	directioncst|ˆˆƒS)N)rl)rs©rYrZrrÚ<lambda>Æsz*optimize_hyperparameters.<locals>.<lambda>)Ún_trials)ZoptunaZcreate_studyÚoptimizeZbest_params)rYrZrwZstudyrrurÚoptimize_hyperparametersÄsry)r)&ÚpandasrZsklearn.model_selectionrÚmatplotlib.pyplotÚpyplotÚpltZseabornZsnsÚnumpyr/Zsklearn.preprocessingrrZlightgbmrWZsklearn.metricsrrrr r&r,r2r<r>rArBrErJrLrOrRrUr\rdrlryrrrrÚ<module>s<