DeepMotif / Git / Diff of /main.lua

Models:
MarcoTheBlack/
DeepMotif
Downloads: 1
Diff of /main.lua [000000] .. [6d0c6b]
Switch to side-by-side view

--- a
+++ b/main.lua
@@ -0,0 +1,275 @@
+require 'torch'
+require 'nn'
+require 'optim'
+require 'model'
+include('util/auRoc.lua')
+require 'lfs'
+
+local cmd = torch.CmdLine()
+
+
+-- model options
+cmd:option('-init_from', '') -- resume previous model
+cmd:option('-dropout', 0.5) -- dropout probability
+cmd:option('-cnn', false) -- cnn model?
+cmd:option('-rnn', false) -- rnn model?
+cmd:option('-rnn_size', 32) -- rnn embedding size
+cmd:option('-rnn_layers', 1) -- number of rnn layers
+cmd:option('-unidirectional', false) -- unidirectional RNN
+cmd:option('-cnn_filters', '9-5-3') -- convolution filter sizes at each layer for CNN. This parameter also specifies the number of CNN layers.
+cmd:option('-cnn_pool', 2) -- covolution layer pool size
+cmd:option('-cnn_size', 128) -- number of conv feature maps at each layer for CNN and CNN-RNN
+
+-- Optimization options
+cmd:option('-max_epochs', 50) -- number of iterations to run for
+cmd:option('-learning_rate', 1e-2)
+cmd:option('-grad_clip', 5) -- gradient clip value magnitude
+cmd:option('-lr_decay_every', 10) -- learning rate decay iteration increment
+cmd:option('-lr_decay_factor', 0.5) -- learning rate decay number
+
+-- GPU
+cmd:option('-gpu', 1) -- set to 0 if no GPU
+
+-- Dataset options
+cmd:option('-data_root', 'data') -- data root directory
+cmd:option('-dataset', 'deepbind') -- dataset
+cmd:option('-seq_length', 101) --length of DNA sequences
+cmd:option('-TF', 'ATF1_K562_ATF1_-06-325-_Harvard') -- change for different TF
+cmd:option('-alphabet', 'ACGT')
+cmd:option('-size', 0) -- how much of each dataset to load. 0 = full
+cmd:option('-batch_size', 256)
+cmd:option('-class_labels','1,0') --specify positive label first
+
+
+
+
+-- Other
+cmd:option('-noprogressbar', false) -- lua progress bar
+cmd:option('-name', '') --special name for model (optional)
+cmd:option('-checkpoint_every', 0) -- save a model checkpoint every X iterations
+
+-- Directory to save models to
+cmd:option('-save_dir', 'models/')
+
+
+local opt = cmd:parse(arg)
+
+opt.class_labels_table = opt.class_labels:split(',')
+opt.num_classes = #opt.class_labels_table
+opt.alphabet_size = #opt.alphabet
+
+local data_dir = opt.data_root..'/'..opt.dataset..'/'
+
+
+-- Name of directory to save the models to
+if opt.cnn and (not opt.rnn) then -- CNN
+    model_name ='model=CNN,cnn_size='..opt.cnn_size..',cnn_filters='..opt.cnn_filters
+elseif opt.cnn and opt.rnn then -- CNN-RNN
+    model_name = 'model=CNN-RNN,cnn_size='..opt.cnn_size..',cnn_filter='..opt.cnn_filters:split('-')[1]..',rnn_size='..opt.rnn_size..',rnn_layers='..opt.rnn_layers
+    if opt.unidirectional then model_name = model_name..',unidirectional' end
+elseif (not opt.cnn) and opt.rnn then  -- RNN
+    model_name = 'model=RNN,rnn_size='..opt.rnn_size..',rnn_layers='..opt.rnn_layers
+    if opt.unidirectional then model_name = model_name..',unidirectional' end
+else
+  print('Need either -cnn or -rnn flag! Exiting')
+  os.exit()
+end
+model_name = model_name..',dropout='..opt.dropout..',learning_rate='..opt.learning_rate..',batch_size='..opt.batch_size
+if (opt.name ~= '') then model_name = model_name..','..opt.name end
+
+
+
+-- Set up GPU stuff
+local dtype = 'torch.FloatTensor'
+if opt.gpu > 0  then
+  collectgarbage()
+  require 'cutorch'
+  require 'cunn'
+  cutorch.setDevice(opt.gpu )
+  dtype = 'torch.CudaTensor'
+  print(string.format('Running with CUDA on GPU %d', opt.gpu))
+else
+  print 'Running in CPU mode'
+end
+
+-- check if file exists
+function file_exists(name)
+   local f=io.open(name,"r")
+   if f~=nil then io.close(f) return true else return false end
+end
+
+-- *Uncomment for loop and if statement to Loop through all TFs and run train/test*
+-- for TF in lfs.dir(data_dir) do
+-- if TF:sub(1,1) ~= '.' and TF:sub(1,3) ~= 'ALL' then
+
+  flag = true
+  opt.TF = TF or opt.TF
+  opt.data_dir = data_dir..opt.TF
+  print('------TF DATASET-------\n'..opt.data_dir..'\n')
+
+
+  local save_file = opt.save_dir..model_name..'/'..opt.TF..'/log.t7'
+  print('------MODEL LOCATION-------\n'..opt.save_dir..model_name..'\n')
+  if file_exists(save_file) and opt.init_from == '' then
+    print('Already trained! Exiting')
+  else
+    local log = {}
+    log['train'] = {}
+    log['test'] = {}
+    log['best_epoch'] = 0
+    log['best_auc'] = 0
+
+    -- ============ Load Data =========== --
+    require('data')
+    data = {}
+    data.train = createDatasetOneHot("train", opt)
+    data.test = createDatasetOneHot("test", opt)
+    train_size = data.train.inputs:size()
+    test_size = data.test.inputs:size()
+
+    -- ====== Initialize the model and criterion ======= --
+    local model = nil
+    if opt.init_from ~= '' then -- load a model
+      print('Initializing from ', opt.init_from)
+      model = torch.load(opt.init_from):type(dtype)
+    else --create new model
+      local opt_clone = torch.deserialize(torch.serialize(opt))
+      print(opt_clone)
+      model = nn.Model(opt_clone):type(dtype)
+    end
+
+    local params, grad_params = model:getParameters()
+    local crit = nn.ClassNLLCriterion():type(dtype)
+    local optim_config = {learningRate = opt.learning_rate, momentum = 0.9}
+    local AUC = auRoc:new()
+
+
+    -- ========== Run Train/Test ============ --
+    local best_trainAUROC = 0
+    for epoch = 1,opt.max_epochs do
+      -- ========== Training ============ --
+      print('======> Training epoch '..epoch)
+
+      model:resetStates()
+      model:training()
+      for t = 1,train_size,opt.batch_size do
+        if not opt.noprogressbar then
+            xlua.progress(t, train_size)
+        end
+        model:resetStates()
+        -- Loss function that we pass to an optim method
+        local function f(w)
+          assert(w == params)
+          grad_params:zero()
+
+          -- Get a minibatch
+          x = data.train.inputs[t]:type(dtype)
+          y = data.train.labels[t]:type(dtype)
+
+         
+		  -- forward model
+          local scores = model:forward(x)
+          -- print(scores)
+          -- os.exit()
+
+          -- add scores and labels to AUC
+          auc_in = scores[{{1,scores:size(1)},{1,1}}]:reshape(scores:size(1))
+          for i = 1,auc_in:size(1) do AUC:add(math.exp(auc_in[i]), y[i]) end
+
+          -- forward/backward criterion
+          local loss = crit:forward(scores, y)
+          local grad_scores = crit:backward(scores, y):view(x:size(1), 2, -1):reshape(x:size(1),2)
+
+          -- backward model
+          -- print(x)
+          -- print(grad_scores)
+          model:backward(x, grad_scores)
+
+          -- clip gradients
+          if opt.grad_clip > 0 then
+            grad_params:clamp(-opt.grad_clip, opt.grad_clip)
+          end
+
+          return loss, grad_params
+        end
+        local _, loss = optim.adam(f, params, optim_config)
+      end
+      local trainAUROC = AUC:calculateAuc()
+      print('\nTrain AUC: '..trainAUROC)
+      AUC:zero()
+
+
+      -- =========== Testing ============ --
+      print('======> Testing epoch '..epoch)
+      model:resetStates()
+      model:evaluate()
+      for t = 1,test_size,opt.batch_size do
+
+        --progress bar
+        if not opt.noprogressbar then
+          xlua.progress(t, test_size)
+        end
+
+        -- get data
+        x = data.test.inputs[t]:type(dtype)
+        y = data.test.labels[t]:type(dtype)
+
+        -- forward model
+        model:resetStates()
+        local scores = model:forward(x)
+
+        -- add scores and labels to AUC
+        auc_in = scores[{{1,scores:size(1)},{1,1}}]:reshape(scores:size(1))
+        for i = 1,auc_in:size(1) do AUC:add(math.exp(auc_in[i]), y[i]) end
+      end
+      local testAUROC = AUC:calculateAuc()
+      AUC:zero()
+      print('\nTest AUC: '..testAUROC)
+
+
+      -- ======== Checkpoint and Log ========== --
+
+      -- check for training error
+      if testAUROC < 0.1 then
+        flag = false
+        print('error in training, break from current TF')
+        break
+      end
+
+      -- log the best AUC results and remember the model
+      if epoch > 1 and (trainAUROC > best_trainAUROC) then
+        best_trainAUROC = trainAUROC
+        best_model=model:clone('weight','bias')
+        log['best_epoch'] = epoch
+        log['train_auc'] = trainAUROC
+        log['test_auc'] = testAUROC
+      end
+
+      -- Save certain models
+      if epoch % opt.checkpoint_every == 0 then
+        torch.save(lfs.currentdir()..'/'..opt.save_dir..model_name..'/'..opt.TF..'/epoch'..epoch..'_model.t7', model)
+      end
+
+      -- decay learning rate
+      if epoch % opt.lr_decay_every == 0 then
+        local old_lr = optim_config.learningRate
+        optim_config.learningRate = old_lr * opt.lr_decay_factor
+      end
+
+      -- Log every iteration (we don't log every model because of space)
+      table.insert(log['test'],testAUROC)
+      table.insert(log['train'],trainAUROC)
+      collectgarbage()
+    end
+
+    -- ======== Save the Best model and Log ========== --
+    if flag then
+      lfs.mkdir(opt.save_dir..model_name)
+      lfs.mkdir(opt.save_dir..model_name..'/'..opt.TF)
+      torch.save(lfs.currentdir()..'/'..opt.save_dir..model_name..'/'..opt.TF..'/best_model.t7', model)
+      torch.save(opt.save_dir..model_name..'/'..opt.TF..'/log'..'.t7', log)
+    end
+  end
+
+-- end -- if directory
+-- end -- loop through TFs