SHEPHERD / Git / [db6163] /shepherd/node_embedder

Models:
MarcoTheBlack/
SHEPHERD
Downloads: 1
[db6163]: / shepherd / node_embedder_model.py
History
Download this file
427 lines (337 with data), 17.7 kB

# Pytorch 
import torch
import torch.nn as nn

import torch.nn.functional as F
from torch_geometric.nn import BatchNorm, LayerNorm, GATv2Conv

# Pytorch Lightning
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger

# General
import numpy as np
import math
import tqdm
import time
import wandb

# Own
from utils.pretrain_utils import sample_node_for_et, get_batched_data, get_edges, calc_metrics, plot_roc_curve, metrics_per_rel
from decoders import bilinear, trans, dot

# Global variables
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


class NodeEmbeder(pl.LightningModule):

    def __init__(self, all_data, edge_attr_dict, hp_dict=None, num_nodes=None, combined_training=False, spl_mat=[]):
        super().__init__()

        # save hyperparameters
        self.save_hyperparameters("hp_dict", ignore=["spl_mat"])

        # Data
        self.all_data = all_data
        self.edge_attr_dict = edge_attr_dict

        # Model parameters
        self.lr = self.hparams.hp_dict['lr']
        self.lr_factor = self.hparams.hp_dict['lr_factor']
        self.lr_patience = self.hparams.hp_dict['lr_patience']
        self.lr_threshold = self.hparams.hp_dict['lr_threshold']
        self.lr_threshold_mode = self.hparams.hp_dict['lr_threshold_mode']
        self.lr_cooldown = self.hparams.hp_dict['lr_cooldown']
        self.min_lr = self.hparams.hp_dict['min_lr']
        self.eps = self.hparams.hp_dict['eps']
        
        self.wd = self.hparams.hp_dict['wd']
        self.decoder_type = self.hparams.hp_dict['decoder_type']
        self.pred_threshold = self.hparams.hp_dict['pred_threshold']
        
        self.use_spl = None
        self.spl_mat = []
        self.spl_dim = 0
        
        self.nfeat = self.hparams.hp_dict['nfeat']
        self.nhid1 = self.hparams.hp_dict['hidden'] * 2
        self.nhid2 = self.hparams.hp_dict['hidden']
        self.output = self.hparams.hp_dict['output']

        self.node_emb = nn.Embedding(num_nodes, self.nfeat)
        
        self.num_nodes = num_nodes 
        self.num_relations = len(edge_attr_dict)
        self.n_heads = self.hparams.hp_dict['n_heads']
        self.dropout = self.hparams.hp_dict['dropout']
        self.norm_method = self.hparams.hp_dict['norm_method']

        # Select decoder
        if self.decoder_type == "bilinear": self.decoder = bilinear
        elif self.decoder_type == "trans": self.decoder = trans
        elif self.decoder_type == "dot": self.decoder = dot
        
        self.n_layers = 3
        
        self.loss_type = self.hparams.hp_dict['loss']
        self.combined_training = combined_training

        # Conv layers
        self.convs = torch.nn.ModuleList()
        self.convs.append(GATv2Conv(self.nfeat, self.nhid1, self.n_heads)) # input = nfeat, output = nhid1*n_heads
        if self.n_layers == 3:
            self.convs.append(GATv2Conv(self.nhid1*self.n_heads, self.nhid2, self.n_heads)) # input = nhid1*n_heads, output = nhid2*n_heads
            self.convs.append(GATv2Conv(self.nhid2*self.n_heads, self.output, self.n_heads)) # input = nhid2*n_heads, output = output*n_heads
        else:
            self.convs.append(GATv2Conv(self.nhid1*self.n_heads, self.output, self.n_heads)) # input = nhid2*n_heads, output = output*n_heads
        
        # Relation learnable weights
        self.relation_weights = nn.Parameter(torch.Tensor(self.num_relations, self.output * self.n_heads))

        # Normalization (applied after a single conv layer)
        if self.norm_method == "batch":
            self.norms = torch.nn.ModuleList()
            self.norms.append(BatchNorm(self.nhid1*self.n_heads))
            self.norms.append(BatchNorm(self.nhid2*self.n_heads))
        elif self.norm_method == "layer":
            self.norms = torch.nn.ModuleList()
            self.norms.append(LayerNorm(self.nhid1*self.n_heads))
            self.norms.append(LayerNorm(self.nhid2*self.n_heads))
        elif self.norm_method == "batch_layer":
            self.batch_norms = torch.nn.ModuleList()
            self.batch_norms.append(BatchNorm(self.nhid1*self.n_heads))
            if self.n_layers == 3: self.batch_norms.append(BatchNorm(self.nhid2*self.n_heads))
            self.layer_norms = torch.nn.ModuleList()
            self.layer_norms.append(LayerNorm(self.nhid1*self.n_heads))
            if self.n_layers == 3: self.layer_norms.append(LayerNorm(self.nhid2*self.n_heads))

        self.reset_parameters()

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        nn.init.xavier_uniform_(self.relation_weights, gain = nn.init.calculate_gain('leaky_relu'))

    def forward(self, n_ids, adjs): 
        x = self.node_emb(n_ids)
        
        gat_attn = []
        assert len(adjs) == self.n_layers
        for i, (edge_index, _, edge_type, size) in enumerate(adjs):
            
            # Update node embeddings
            x_target = x[:size[1]]  # Target nodes are always placed first. 

            x, (edge_i, alpha) = self.convs[i]((x, x_target), edge_index, return_attention_weights=True)

            edge_i = edge_i.detach().cpu()
            alpha = alpha.detach().cpu()
            edge_i[0,:] = n_ids[edge_i[0,:]]
            edge_i[1,:] = n_ids[edge_i[1,:]]
            gat_attn.append((edge_i, alpha))

            # Normalize
            if i != self.n_layers - 1:
                if self.norm_method in ["batch", "layer"]:
                    x = self.norms[i](x)
                elif self.norm_method == "batch_layer":
                    x = self.layer_norms[i](x)
                x = F.leaky_relu(x)
                if self.norm_method == "batch_layer":
                    x = self.batch_norms[i](x)
                x = F.dropout(x, p=self.dropout, training=self.training)

        return x, gat_attn



    
    def get_negative_target_nodes(self, data, pos_target_embeds, curr_pos_target_embeds, all_edge_types):
        if self.hparams.hp_dict['negative_sampler_approach'] == 'all':
            # get negative targets by shuffling positive targets
            if 'index_to_node_features_pos' in data:
                rand_index = torch.randperm(data.index_to_node_features_pos.size(0))
            else:
                rand_index = torch.randperm(curr_pos_target_embeds.size(0))
            
        elif self.hparams.hp_dict['negative_sampler_approach'] == 'by_edge_type':
            # get negative targets by shuffling positive targets within each edge type
            et_ids, et_counts = all_edge_types.unique(return_counts=True)
            targets_dict = self.create_target_dict(all_edge_types, et_ids) # indices into all_edge_types for each edge type
            rand_index = torch.tensor(np.vectorize(sample_node_for_et)(all_edge_types.cpu(), targets_dict)).to(device)

        if 'index_to_node_features_pos' in data:
            index_to_node_features_neg = data.index_to_node_features_pos[rand_index] #NOTE: currently possible to get the same node as positive & negative target
            curr_neg_target_embeds = pos_target_embeds[index_to_node_features_neg,:]
        else:
            curr_neg_target_embeds = curr_pos_target_embeds[rand_index,:]
        
        return curr_neg_target_embeds

    def create_target_dict(self, all_edge_types, et_ids):
        targets_dict = {}
        for k in et_ids:
            indices = (all_edge_types == int(k)).nonzero().cpu()
            targets_dict[int(k)] = indices
        return targets_dict

    def decode(self, data, source_embeds, pos_target_embeds, all_edge_types): 
        curr_source_embeds = source_embeds[data.index_to_node_features_pos,:]
        curr_pos_target_embeds = pos_target_embeds[data.index_to_node_features_pos,:]

        ts = time.time()
        curr_neg_target_embeds = self.get_negative_target_nodes(data, pos_target_embeds, curr_pos_target_embeds, all_edge_types)
        te = time.time()
        if self.hparams.hp_dict['time']:
            print(f"Negative sampling took {te - ts:0.4f} seconds")

        # Get source & targets for pos & negative edges
        source = torch.cat([curr_source_embeds, curr_source_embeds])
        target = torch.cat([curr_pos_target_embeds, curr_neg_target_embeds])
        all_edge_types = torch.cat([all_edge_types, all_edge_types])
        data.all_edge_types = all_edge_types

        if self.decoder_type == "dot": 
            return data, self.decoder(source, target)
        else: 
            relation = self.relation_weights[all_edge_types]
            return data, self.decoder(source, relation, target)

    def get_predictions(self, data, embed):
        
        # Apply decoder
        source_embed, target_embed = embed.split(embed.size(0) // 2, dim=0)
        data, raw_pred = self.decode(data, source_embed, target_embed, data.pos_edge_types)
        
        # Apply activation
        if self.loss_type != "max-margin": 
            pred = torch.sigmoid(raw_pred)
        else: 
            pred = torch.tanh(raw_pred)
        
        return data, raw_pred, pred

    def get_link_labels(self, edge_types):
        num_links = edge_types.size(0) 
        link_labels = torch.zeros(num_links, dtype=torch.float, device=edge_types.device)
        link_labels[:(int(num_links/2))] = 1.
        link_labels[(int(num_links/2)):] = 0.
        return link_labels

    def _step(self, data, dataset_type):

        if not self.combined_training: 
            ts = time.time()
            data = get_batched_data(data, self.all_data) 
            tm = time.time()
            data = get_edges(data, self.all_data, dataset_type)
            te=time.time()
        data = data.to(device)

        # Get predictions
        t0 = time.time()
        out, gat_attn = self.forward(data.n_id, data.adjs) 
        t1 = time.time()
        data, raw_pred, pred = self.get_predictions(data, out)
        t2 = time.time()

        # Calculate loss
        link_labels = self.get_link_labels(data.all_edge_types)
        loss = self.calc_loss(pred, link_labels)
        t3 = time.time()

        # Calculate metrics
        if self.loss_type == "max-margin":
            metric_pred = torch.sigmoid(raw_pred)
            self.logger.experiment.log({f'{dataset_type}/node_predicted_probs': wandb.Histogram(metric_pred.cpu().detach().numpy())})
        else: metric_pred = pred
        roc_score, ap_score, acc, f1 = calc_metrics(metric_pred.cpu().detach().numpy(), link_labels.cpu().detach().numpy(), self.pred_threshold)
        self.logger.experiment.log({f'{dataset_type}/node_roc_curve': plot_roc_curve(metric_pred.cpu().detach().numpy(), link_labels.cpu().detach().numpy())})
        
        t4 = time.time()
        if self.hparams.hp_dict['time']:
            print(f'It took {tm-ts:0.2f}s to get batched data, {te-tm:0.2f}s to get edges, {t1-t0:0.2f}s to complete forward pass, {t2-t1:0.2f}s to decode, {t3-t2:0.2f}s to calc loss, and {t4-t3:0.2f}s to calc other metrics.')

        return data, loss, pred, link_labels, roc_score, ap_score, acc, f1

    def training_step(self, data, data_idx):
        data, loss, pred, link_labels, roc_score, ap_score, acc, f1 = self._step(data, 'train')
        
        logs = {"train/node_batch_loss": loss.detach(), 
                "train/node_roc": roc_score, 
                "train/node_ap": ap_score, 
                "train/node_acc": acc, 
                "train/node_f1": f1
               }

        rel_logs = metrics_per_rel(pred, link_labels, self.edge_attr_dict, data.all_edge_types, "train", self.pred_threshold)
        logs.update(rel_logs)
        self._logger(logs)
        return {'loss': loss, 'logs': logs}

    def training_epoch_end(self, outputs):     
        roc_train = []
        ap_train = []
        acc_train = []
        f1_train = []
        total_train_loss = []

        for batch_log in outputs:
            roc_train.append(batch_log['logs']["train/node_roc"])
            ap_train.append(batch_log['logs']["train/node_ap"])
            acc_train.append(batch_log['logs']["train/node_acc"])
            f1_train.append(batch_log['logs']["train/node_f1"])
            total_train_loss.append(batch_log['logs']["train/node_batch_loss"])

        self._logger({"train/node_total_loss": torch.mean(torch.Tensor(total_train_loss)), 
                      "train/node_total_roc": np.mean(roc_train), 
                      "train/node_total_ap": np.mean(ap_train), 
                      "train/node_total_acc": np.mean(acc_train), 
                      "train/node_total_f1": np.mean(f1_train)})
        self._logger({'node_curr_epoch': self.current_epoch})

    def validation_step(self, data, data_idx):
        data, loss, pred, link_labels, roc_score, ap_score, acc, f1 = self._step(data, 'val')
        
        logs = {"val/node_batch_loss": loss.detach().cpu(), 
                "val/node_roc": roc_score, 
                "val/node_ap": ap_score, 
                "val/node_acc": acc, 
                "val/node_f1": f1
               }

        rel_logs = metrics_per_rel(pred, link_labels, self.edge_attr_dict, data.all_edge_types, "val", self.pred_threshold)
        logs.update(rel_logs)
        self._logger(logs)
        return logs

    def validation_epoch_end(self, outputs):
        roc_val = []
        ap_val = []
        acc_val = []
        f1_val = []
        total_val_loss = []

        for batch_log in outputs:
            roc_val.append(batch_log["val/node_roc"])
            ap_val.append(batch_log["val/node_ap"])
            acc_val.append(batch_log["val/node_acc"])
            f1_val.append(batch_log["val/node_f1"])
            total_val_loss.append(batch_log["val/node_batch_loss"])
        
        self._logger({"val/node_total_loss": torch.mean(torch.Tensor(total_val_loss)), 
                      "val/node_total_roc": np.mean(roc_val), 
                      "val/node_total_ap": np.mean(ap_val), 
                      "val/node_total_acc": np.mean(acc_val), 
                      "val/node_total_f1": np.mean(f1_val)})
        self._logger({'node_curr_epoch': self.current_epoch})

    def test_step(self, data, data_idx):
        data, loss, pred, link_labels, roc_score, ap_score, acc, f1 = self._step(data, 'test')

        logs = {"test/node_batch_loss": loss.detach().cpu(), 
                "test/node_roc": roc_score, 
                "test/node_ap": ap_score, 
                "test/node_acc": acc, 
                "test/node_f1": f1
               }

        rel_logs = metrics_per_rel(pred, link_labels, self.edge_attr_dict, data.all_edge_types, "test", self.pred_threshold)
        logs.update(rel_logs)
        self._logger(logs)
        return logs

    def test_epoch_end(self, outputs):
        roc = []
        ap = []
        acc = []
        f1 = []

        for batch_log in outputs:
            roc.append(batch_log["test/node_roc"])
            ap.append(batch_log["test/node_ap"])
            acc.append(batch_log["test/node_acc"])
            f1.append(batch_log["test/node_f1"])
        
        self._logger({"test/node_total_roc": np.mean(roc), 
                      "test/node_total_ap": np.mean(ap), 
                      "test/node_total_acc": np.mean(acc), 
                      "test/node_total_f1": np.mean(f1)})
        self._logger({'node_curr_epoch': self.current_epoch})

    
    def predict(self, data):
        n_id = torch.arange(self.node_emb.weight.shape[0], device=self.device)

        x = self.node_emb(n_id)

        gat_attn = []
        for i in range(len(self.convs)):
            
            # Update node embeddings
            x, (edge_i, alpha) = self.convs[i](x, data.edge_index.to(self.device), return_attention_weights=True) #

            edge_i = edge_i.detach().cpu()
            alpha = alpha.detach().cpu()
            edge_i[0,:] = n_id[edge_i[0,:]]
            edge_i[1,:] = n_id[edge_i[1,:]]
            gat_attn.append((edge_i, alpha))
            
            # Normalize
            if i != self.n_layers - 1:
                if self.norm_method in ["batch", "layer"]:
                    x = self.norms[i](x)
                elif self.norm_method == "batch_layer":
                    x = self.layer_norms[i](x)
                x = F.leaky_relu(x)
                if self.norm_method == "batch_layer":
                    x = self.batch_norms[i](x)
        
        assert x.shape[0] == self.node_emb.weight.shape[0]

        return x, gat_attn

    def predict_step(self, data, data_idx):
        x, gat_attn = self.predict(data)
        return x, gat_attn
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr, weight_decay = self.wd)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=self.lr_factor, patience=self.lr_patience, threshold=self.lr_threshold, threshold_mode=self.lr_threshold_mode, cooldown=self.lr_cooldown, min_lr=self.min_lr, eps=self.eps)
        return {
                "optimizer": optimizer,
                "lr_scheduler": 
                    {
                    "scheduler": scheduler,
                    "monitor": "val/node_total_loss",
                    'name': 'curr_lr'
                    },
                }

    def _logger(self, logs):
        for k, v in logs.items():
            self.log(k, v)

    def calc_loss(self, pred, y):
        if self.loss_type == "BCE":
            loss = F.binary_cross_entropy(pred, y, reduction='none')
            norm_loss = torch.mean(loss)

        elif self.loss_type == "max-margin": 
            loss = ((1 - (pred[y == 1] - pred[y != 1])).clamp(min=0).mean())
            norm_loss = loss

        return norm_loss