--- a
+++ b/HINT/module.py
@@ -0,0 +1,120 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from copy import deepcopy 
+from torch.autograd import Variable
+from torch.utils import data
+from torch.utils.data import SequentialSampler
+import matplotlib.pyplot as plt
+import numpy as np 
+sigmoid = torch.nn.Sigmoid() 
+torch.manual_seed(0)
+
+from HINT.gnn_layers import GraphConvolution, GraphAttention
+torch.manual_seed(4) 
+np.random.seed(1)
+
+class Highway(nn.Module):
+    def __init__(self, size, num_layers):
+        super(Highway, self).__init__()
+        self.num_layers = num_layers
+        self.nonlinear = nn.ModuleList([nn.Linear(size, size) for _ in range(num_layers)])
+        self.linear = nn.ModuleList([nn.Linear(size, size) for _ in range(num_layers)])
+        self.gate = nn.ModuleList([nn.Linear(size, size) for _ in range(num_layers)])
+        self.f = F.relu
+
+    def forward(self, x):
+        """
+            :param x: tensor with shape of [batch_size, size]
+            :return: tensor with shape of [batch_size, size]
+            applies σ(x) ⨀ (f(G(x))) + (1 - σ(x)) ⨀ (Q(x)) transformation | G and Q is affine transformation,
+            f is non-linear transformation, σ(x) is affine transformation with sigmoid non-linearition
+            and ⨀ is element-wise multiplication
+        """
+        for layer in range(self.num_layers):
+            gate = F.sigmoid(self.gate[layer](x))
+            nonlinear = self.f(self.nonlinear[layer](x))
+            linear = self.linear[layer](x)
+            x = gate * nonlinear + (1 - gate) * linear
+        return x
+
+
+
+
+
+
+class GCN(nn.Module):
+    def __init__(self, nfeat, nhid, nclass, dropout, init):
+        super(GCN, self).__init__()
+
+        self.gc1 = GraphConvolution(nfeat, nhid, init=init)
+        self.gc2 = GraphConvolution(nhid, nclass, init=init)
+        self.dropout = dropout
+
+    def bottleneck(self, path1, path2, path3, adj, in_x):
+        return F.relu(path3(F.relu(path2(F.relu(path1(in_x, adj)), adj)), adj))
+
+    def forward(self, x, adj):
+        x = F.dropout(F.relu(self.gc1(x, adj)), self.dropout, training=self.training)
+        x = self.gc2(x, adj)
+        return x 
+        # return F.log_softmax(x, dim=1)
+
+
+
+
+class GCN_drop_in(nn.Module):
+    def __init__(self, nfeat, nhid, nclass, dropout, init):
+        super(GCN_drop_in, self).__init__()
+
+        self.gc1 = GraphConvolution(nfeat, nhid, init=init)
+        self.gc2 = GraphConvolution(nhid, nclass, init=init)
+        self.dropout = dropout
+
+    def bottleneck(self, path1, path2, path3, adj, in_x):
+        return F.relu(path3(F.relu(path2(F.relu(path1(in_x, adj)), adj)), adj))
+
+    def forward(self, x, adj):
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = F.dropout(F.relu(self.gc1(x, adj)), self.dropout, training=self.training)
+        x = self.gc2(x, adj)
+
+        return F.log_softmax(x, dim=1)
+
+class GAT(nn.Module):
+    def __init__(self, nfeat, nhid, nclass, dropout, alpha, nheads):
+        super(GAT, self).__init__()
+        self.dropout = dropout
+
+        self.attentions = [GraphAttention(nfeat, nhid, dropout=dropout, alpha=alpha, concat=True) for _ in range(nheads)]
+        for i, attention in enumerate(self.attentions):
+            self.add_module('attention_{}'.format(i), attention)
+
+        self.out_att = GraphAttention(nhid * nheads, nclass, dropout=dropout, alpha=alpha, concat=False)
+
+    def forward(self, x, adj):
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = torch.cat([att(x, adj) for att in self.attentions], dim=1)
+        x = F.dropout(x, self.dropout, training=self.training)
+        x = F.elu(self.out_att(x, adj))
+        return F.log_softmax(x, dim=1)
+
+
+
+
+if __name__ == "__main__":
+    gnn = GCN(
+            nfeat = 20,
+            nhid = 30,
+            nclass = 1,
+            dropout = 0.6,
+            init = 'uniform') 
+
+
+
+
+
+
+
+
+