--- a +++ b/lib/pred_util.py @@ -0,0 +1,59 @@ +import numpy as np +import pandas as pd +import pickle + +from rdkit import Chem +from rdkit.Chem import Descriptors +from rdkit.ML.Descriptors import MoleculeDescriptors +import pandas as pd +import numpy as np +from tqdm import tqdm + +def model_predict(feat,Env = 'SIF'): + SIF_FEATURE_LIST = ['MinAbsEStateIndex','qed','MinPartialCharge','Chi1v','PEOE_VSA8','SMR_VSA10','SMR_VSA4','SMR_VSA6','SlogP_VSA3','EState_VSA10','EState_VSA2','EState_VSA6','EState_VSA8','EState_VSA9','VSA_EState1','VSA_EState4','VSA_EState8'] + SGF_FEATURE_LIST = ['ExactMolWt','NumHAcceptors','NumHDonors','MolLogP','TPSA','NumRotatableBonds'] + + if Env == 'SIF': + print('Prediting SIF Stability...') + feat = feat[SIF_FEATURE_LIST] + df_pred=feat.assign(Env='Intestinal') + model = pickle.load(open('model/SIF_model', 'rb')) + elif Env == 'SGF': + feat = feat[SGF_FEATURE_LIST] + print('Prediting SGF Stability...') + df_pred=feat.assign(Env='Gastric') + model = pickle.load(open('model/SGF_model', 'rb')) + else: + raise KeyError('Wrong Env Set, should be either SIF or SGF') + + + pred_Env = model['GI_encoder'].transform(np.array(df_pred['Env']).reshape(-1, 1)) + pred_features=feat + + #PCA + pred_features = model['feature_scaler'].transform(np.array(pred_features)) + pred_Features = np.concatenate([pred_Env,pred_features],axis=1) + + y_pred = model['clf'].predict(pred_Features) + y_pred = model['Label_encoder'].inverse_transform(y_pred.reshape(-1, 1)) + return (y_pred) + +def pep_feat(SMILES_list_PATH): + pep_db = pd.read_csv(SMILES_list_PATH) + des_list = [x[0] for x in Descriptors._descList] + feat=np.zeros([len(pep_db['SMILES']),len(des_list)]) + calculator = MoleculeDescriptors.MolecularDescriptorCalculator(des_list) + + for i in tqdm(range(len(pep_db))): + mol = Chem.MolFromSmiles(pep_db['SMILES'][i]) + feat[i] = calculator.CalcDescriptors(mol) + + return pd.DataFrame(feat,columns=des_list) + +def save_results(SMILES_list_PATH,SIF_Stability,SGF_Stability): + pep_db = pd.read_csv(SMILES_list_PATH) + pep_db=pep_db.assign(Stability_in_SIF=SIF_Stability) + pep_db=pep_db.assign(Stability_in_SGF=SGF_Stability) + pep_db.to_csv(SMILES_list_PATH,index=False) + print('Predicted SIF/SGF stability saved to the original file: ',SMILES_list_PATH) + return pep_db \ No newline at end of file