--- a
+++ b/neuroqwerty-mit-csxpd-dataset-1.0.0/nqDataLoader.py
@@ -0,0 +1,244 @@
+# -*- coding: utf-8 -*-
+
+# set modules  dir
+import numpy as np
+import sys, os, re, datetime
+
+
+class NqDataLoader:
+    FLT_NO_MOUSE = 1 << 0
+    FLT_NO_LETTERS = 1 << 1
+    FLT_NO_BACK = 1 << 2
+    FLT_NO_SHORT_META = 1 << 3    # space, enter, arrows, etc.
+    FLT_NO_LONG_META = 1 << 4 # shift, control, alt, ect.
+    FLT_NO_PUNCT = 1 << 5
+    
+    def __init__(self):
+        self.dataKeys = None
+        self.dataHT = None
+        self.dataTimeStart = None
+        self.dataTimeEnd = None
+        pass
+    
+
+    def sanityCheck( self ):
+        """
+        Filter out keystrokes variables in the member variables. 
+        Eliminate anything < 0.
+        returns the number of elements removed
+        """
+        assert( self.dataKeys is not None and len(self.dataKeys) > 0 )
+        assert( self.dataHT is not None and len(self.dataHT) > 0 )
+        assert( self.dataTimeStart is not None and len(self.dataTimeStart) > 0 )
+        assert( self.dataTimeEnd is not None and len(self.dataTimeEnd) > 0 )
+        
+        badLbl = self.dataTimeStart <= 0
+        badLbl = np.bitwise_or( badLbl,  self.dataTimeEnd <= 0)
+        badLbl = np.bitwise_or( badLbl,  self.dataHT < 0)
+        badLbl = np.bitwise_or( badLbl,  self.dataHT >= 5)
+        #----- remove non consecutive start times
+        nonConsTmpLbl = np.zeros( len(self.dataTimeStart) ) == 0 # start with all True labels
+        nonConsLbl = np.zeros( len(self.dataTimeStart) ) > 0 # start with all False labels
+        startTmpArr = self.dataTimeStart.copy()
+        while ( np.sum( nonConsTmpLbl ) > 0 ):
+			# find non consecutive labels
+			nonConsTmpLbl = np.append([False], np.diff(startTmpArr)<0)                
+   			# keep track of the indeces to remove
+			nonConsLbl = np.bitwise_or( nonConsLbl,  nonConsTmpLbl)
+               # changes value in the temporary array
+			indecesToChange = np.arange(len(nonConsTmpLbl))[nonConsTmpLbl]
+			startTmpArr[indecesToChange] = startTmpArr[indecesToChange-1]
+
+        badLbl = np.bitwise_or( badLbl,  nonConsLbl)
+        #-----
+		
+        # invert bad labels
+        goodLbl = np.bitwise_not(badLbl)
+        
+        self.dataKeys = self.dataKeys[goodLbl]
+        self.dataHT = self.dataHT[goodLbl]
+        self.dataTimeStart = self.dataTimeStart[goodLbl]
+        self.dataTimeEnd = self.dataTimeEnd[goodLbl]
+             
+        
+        return sum(badLbl)
+    
+    def loadDataFile(self, fileIn, autoFilt=True, impType=None, debug=False):  
+        """
+        Load raw data file
+        """      
+        errorStr = ''
+        try:
+            data = []
+            
+#            if data.dtype == np.int64: # Sleep inertia format
+            if impType =='si':
+                data = np.genfromtxt(fileIn, dtype=long, delimiter=',', skip_header=0)
+                data = data - data.min()
+                data = data.astype(np.float64) / 1000
+                self.dataTimeStart = data[:,0]  
+                self.dataTimeEnd = data[:,1]
+                self.dataHT = self.dataTimeEnd - self.dataTimeStart
+                #TO REMOVE
+                self.dataKeys = np.zeros(len(self.dataHT))#Just to make sanity work
+                remNum = self.sanityCheck()
+                #print remNum
+            else: # PD format
+                data = np.genfromtxt(fileIn, dtype=None, delimiter=',', skip_header=0)
+                # load
+                self.dataKeys = data['f0']
+                self.dataHT = data['f1']  
+                self.dataTimeStart = data['f3']  #No CHANGED 2<->3
+                self.dataTimeEnd = data['f2']
+                remNum = self.sanityCheck()
+                #print '{:}, {:} %'.format( remNum, 1.0*remNum/len(self.dataHT) )
+                
+                if (debug):
+                    print 'removed ', str(remNum), ' elements'
+
+                if( autoFilt ):
+                    self.filtData(self.FLT_NO_MOUSE  | self.FLT_NO_LONG_META )
+            
+            # load flight time
+            self.dataFT = np.array([ self.dataTimeStart[i]-self.dataTimeStart[i-1]  for i in range(1,self.dataTimeStart.size) ])
+            self.dataFT = np.append(self.dataFT, 0)
+            
+            
+            
+            return True
+        except IOError:
+            errorStr = 'file {:s} not found'.format(fileIn)
+            return errorStr
+    def loadDataArr(self, lstArr):
+        self.dataKeys = np.zeros((len(lstArr),1), dtype='S30')
+        self.dataHT = np.zeros((len(lstArr),1))
+        self.dataTimeStart = np.zeros((len(lstArr),1))  
+        self.dataTimeEnd =np.zeros((len(lstArr),1))
+        i = 0
+        for row in lstArr:
+            tok = row.split(',')
+            self.dataKeys[i] = str(tok[0])
+            self.dataHT[i] = str(tok[1])
+            self.dataTimeStart[i] = str(tok[2])
+            self.dataTimeEnd[i] = str(tok[3]) 
+            i += 1
+            
+        #self.loadDataFile(lstArr.toString())
+    
+
+    def filtData(self, flags):
+        """
+        Filter data
+        return (fltKeys, fltHT, fltTimeStart, fltTimeEnd)
+        """
+        #-- filters
+        pMouse=re.compile('("mouse.+")')
+        pChar=re.compile('(".{1}")')
+        pBack=re.compile('("BackSpace")')
+        pLongMeta=re.compile('("Shift.+")|("Alt.+")|("Control.+")')
+        pShortMeta=re.compile('("space")|("Num_Lock")|("Return")|("P_Enter")|("Caps_Lock")|("Left")|("Right")|("Up")|("Down")')
+        pPunct=re.compile('("more")|("less")|("exclamdown")|("comma")|("\[65027\]")|("\[65105\]")|("ntilde")|("minus")|("equal")|("bracketleft")|("bracketright")|("semicolon")|("backslash")|("apostrophe")|("comma")|("period")|("slash")|("grave")')
+        #--
+
+        #-- create mask labels        
+        lbl = np.ones(len( self.dataKeys ))==1
+        if( flags & self.FLT_NO_MOUSE ):
+            lblTmp = [ pMouse.match( k ) is None for k in self.dataKeys]
+            lbl = lbl & lblTmp
+        if( flags & self.FLT_NO_LETTERS ):
+            lblTmp = [ pChar.match( k ) is None for k in self.dataKeys]
+            lbl = lbl & lblTmp
+        if( flags & self.FLT_NO_BACK ):
+            lblTmp = [ pBack.match( k ) is None for k in self.dataKeys]
+            lbl = lbl & lblTmp
+        if( flags & self.FLT_NO_SHORT_META ):
+            lblTmp = [ pShortMeta.match( k ) is None for k in self.dataKeys]
+            lbl = lbl & lblTmp
+        if( flags & self.FLT_NO_LONG_META ):
+            lblTmp = [ pLongMeta.match( k ) is None for k in self.dataKeys]
+            lbl = lbl & lblTmp
+        if( flags & self.FLT_NO_PUNCT ):
+            lblTmp = [ pPunct.match( k ) is None for k in self.dataKeys]
+            lbl = lbl & lblTmp
+        #--
+        
+        self.lbl = lbl        
+        
+        self.dataKeys = self.dataKeys[lbl]
+        self.dataHT = self.dataHT[lbl]
+        self.dataTimeStart = self.dataTimeStart[lbl]
+        self.dataTimeEnd = self.dataTimeEnd[lbl]        
+        
+    def getStdVariablesFilt( fileIn, impType=None ):
+        """
+        Receives as parameter the location of the raw typing file
+        Return filtered variables (i.e. no mouse clicks, no long meta buttons, no backspaces) 
+        format returned (array of keys, array of hold times, array of press events timestamps, array of release events timestamps )
+        """
+        nqObj = self
+        res = nqObj.loadDataFile( fileIn, False, impType)
+        # remove delete button
+        nqObj.filtData(nqObj.FLT_NO_MOUSE  | nqObj.FLT_NO_LONG_META | nqObj.FLT_NO_BACK )
+        assert(res==True) # make sure the file exists
+        dataKeys = nqObj.dataKeys
+        dataHT = nqObj.dataHT
+        dataTimeStart = nqObj.dataTimeStart
+        dataTimeEnd = nqObj.dataTimeEnd
+        
+        return dataKeys, dataHT, dataTimeStart, dataTimeEnd
+
+
+def getDataFiltHelper( fileIn, impType=None ):
+    """
+    Helper method to load filtered keypress data from given file
+    :param fileIn: path to csv keypress file 
+    :param impType: format of the csv file ('si': for sleep inertia data, None for PD data)
+    :return: list of array with dataKeys, dataHT, dataTimeStart, dataTimeEnd
+    """
+    nqObj = NqDataLoader()
+    res = nqObj.loadDataFile( fileIn, False, impType)
+    # remove delete button
+    nqObj.filtData(nqObj.FLT_NO_MOUSE  | nqObj.FLT_NO_LONG_META | nqObj.FLT_NO_BACK )
+    assert(res==True) # make sure the file exists
+    dataKeys = nqObj.dataKeys
+    dataHT = nqObj.dataHT
+    dataTimeStart = nqObj.dataTimeStart
+    dataTimeEnd = nqObj.dataTimeEnd
+    
+    return dataKeys, dataHT, dataTimeStart, dataTimeEnd
+    
+    
+def genFileStruct( dataDir, maxRepNum=4 ):
+    '''
+    Generate a dictionary with the NQ file list and test date (legacy method)
+    :param dataDir: base directory containing the CSV files
+    :param maxRepNum: integer with the maximum repetition number
+    :return: two dictionaries: fMap, dateMap = NQ file/date list[pID][repID][expID]
+    '''
+    fMap = {} # data container
+    dateMap = {}
+    files = os.listdir( dataDir )    
+    p = re.compile( '([0-9]+)\.{1}([0-9]+)_([0-9]+)_([0-9]+)\.csv' )
+    for f in files:
+        m = p.match( f )
+        
+        if( m ): # file found
+            timeStamp = m.group(1)
+            pID = int(m.group(2))
+            repID = int(m.group(3))
+            expID = int(m.group(4))
+            # store new patient
+            if( not fMap.has_key(pID) ):
+                fMap[pID] = {}
+                dateMap[pID] = {}
+                for tmpRid in range(1, maxRepNum+1):
+                    fMap[pID][tmpRid] = {}
+                    dateMap[pID][tmpRid] = {}
+                # fMap[pID] = {1: {}, 2: {}, 3: {}, 4:{}}
+            # store data
+            fMap[pID][repID][expID] = dataDir + f
+            dateMap[pID][repID][expID] = datetime.datetime.fromtimestamp(int(timeStamp))
+        else:
+            print f, ' no'
+            
+    return fMap, dateMap