Diff of /lib/pred_util.py [000000] .. [132747]

Switch to side-by-side view

--- a
+++ b/lib/pred_util.py
@@ -0,0 +1,59 @@
+import numpy as np 
+import pandas as pd   
+import pickle
+
+from rdkit import Chem 
+from rdkit.Chem import Descriptors
+from rdkit.ML.Descriptors import MoleculeDescriptors
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+
+def model_predict(feat,Env = 'SIF'):
+    SIF_FEATURE_LIST = ['MinAbsEStateIndex','qed','MinPartialCharge','Chi1v','PEOE_VSA8','SMR_VSA10','SMR_VSA4','SMR_VSA6','SlogP_VSA3','EState_VSA10','EState_VSA2','EState_VSA6','EState_VSA8','EState_VSA9','VSA_EState1','VSA_EState4','VSA_EState8']
+    SGF_FEATURE_LIST = ['ExactMolWt','NumHAcceptors','NumHDonors','MolLogP','TPSA','NumRotatableBonds']
+
+    if Env == 'SIF':
+        print('Prediting SIF Stability...')
+        feat = feat[SIF_FEATURE_LIST]
+        df_pred=feat.assign(Env='Intestinal')
+        model = pickle.load(open('model/SIF_model', 'rb'))
+    elif Env == 'SGF':
+        feat = feat[SGF_FEATURE_LIST]
+        print('Prediting SGF Stability...')
+        df_pred=feat.assign(Env='Gastric')
+        model = pickle.load(open('model/SGF_model', 'rb'))
+    else:
+        raise KeyError('Wrong Env Set, should be either SIF or SGF')
+    
+
+    pred_Env = model['GI_encoder'].transform(np.array(df_pred['Env']).reshape(-1, 1))
+    pred_features=feat
+
+    #PCA
+    pred_features = model['feature_scaler'].transform(np.array(pred_features))
+    pred_Features = np.concatenate([pred_Env,pred_features],axis=1)
+
+    y_pred = model['clf'].predict(pred_Features)
+    y_pred = model['Label_encoder'].inverse_transform(y_pred.reshape(-1, 1))
+    return (y_pred)
+
+def pep_feat(SMILES_list_PATH):
+    pep_db = pd.read_csv(SMILES_list_PATH)
+    des_list = [x[0] for x in Descriptors._descList]
+    feat=np.zeros([len(pep_db['SMILES']),len(des_list)])
+    calculator = MoleculeDescriptors.MolecularDescriptorCalculator(des_list)
+
+    for i in tqdm(range(len(pep_db))):
+        mol = Chem.MolFromSmiles(pep_db['SMILES'][i])
+        feat[i] = calculator.CalcDescriptors(mol)
+
+    return pd.DataFrame(feat,columns=des_list)
+
+def save_results(SMILES_list_PATH,SIF_Stability,SGF_Stability):
+    pep_db = pd.read_csv(SMILES_list_PATH)
+    pep_db=pep_db.assign(Stability_in_SIF=SIF_Stability)
+    pep_db=pep_db.assign(Stability_in_SGF=SGF_Stability)
+    pep_db.to_csv(SMILES_list_PATH,index=False)
+    print('Predicted SIF/SGF stability saved to the original file: ',SMILES_list_PATH)
+    return pep_db
\ No newline at end of file