{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "d3426c73-9556-4223-a0f4-afdd5edbd911", "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import torch\n", "import xgboost as xgb\n", "from scipy.stats import spearmanr\n", "from sklearn.model_selection import train_test_split, GridSearchCV, KFold\n", "from sklearn.metrics import mean_squared_error\n", "from transformers import AutoTokenizer, AutoModel" ] }, { "cell_type": "code", "execution_count": 2, "id": "cc1849cc-efb9-4eb9-946e-3ce2a0480fee", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of examples is: 560\n" ] }, { "data": { "text/html": [ "
\n", " | CANONICAL_SMILES | \n", "pIC50 | \n", "
---|---|---|
0 | \n", "Nc1nc(N)c2c(Sc3ccccc3)cccc2n1 | \n", "6.21 | \n", "
1 | \n", "COc1ccc(OC)c(Cc2sc3nc(N)nc(N)c3c2C)c1 | \n", "6.14 | \n", "
2 | \n", "CN(Cc1coc2nc(N)nc(N)c12)c3ccc(cc3)C(=O)N[C@@H]... | \n", "6.66 | \n", "
3 | \n", "Nc1nc(N)c2nc(CSc3ccc(cc3)C(=O)NC(CCC(=O)O)C(=O... | \n", "5.57 | \n", "
4 | \n", "Nc1nc(N)c2nc(CCSc3ccc(cc3)C(=O)NC(CCC(=O)O)C(=... | \n", "4.60 | \n", "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bynode=None,\n", " colsample_bytree=0.9, device=None, early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None, feature_types=None,\n", " gamma=None, grow_policy=None, importance_type=None,\n", " interaction_constraints=None, learning_rate=0.1, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=4, max_leaves=None,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " multi_strategy=None, n_estimators=200, n_jobs=None,\n", " num_parallel_tree=None, random_state=42, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
XGBRegressor(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bynode=None,\n", " colsample_bytree=0.9, device=None, early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None, feature_types=None,\n", " gamma=None, grow_policy=None, importance_type=None,\n", " interaction_constraints=None, learning_rate=0.1, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=4, max_leaves=None,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " multi_strategy=None, n_estimators=200, n_jobs=None,\n", " num_parallel_tree=None, random_state=42, ...)