[9fab9f]: / modules / StatManip / SMManager.py

Download this file

56 lines (48 with data), 2.2 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# Computes the tf-idf values of the tokenCancer matrix
import pandas as pd
import scipy as sp
import numpy as np
import math
import sys
sys.path.insert(0, "/home/skjena/cancerTherapy/modules")
from Status.Status import Status
class SMManager:
def __init__(self):
self.status = Status("SMManager")
def isNumber(self, s):
try:
float(s)
return True
except ValueError:
return False
def calctf_idf(self):
df = pd.read_csv("/home/skjena/cancerTherapy/modules/RawDB/scripts/tokenCancer.csv")
termFrequency = df[df.columns.difference(['CANCERS'])]
lengthOfDocument = len(df.columns)
termFrequency = (1/float(lengthOfDocument)) * termFrequency
numberOfDocuments = len(df)
inverseDocumentFrequency = df[df.columns.difference(['CANCERS'])]
for index in range(lengthOfDocument - 1) :
for ind in range(numberOfDocuments -1):
inverseDocumentFrequency.at[ind, index] =math.log(numberOfDocuments/1+inverseDocumentFrequency.iloc[ind,index])
tf_idf = [termFrequency.apply(lambda x: x*y) for _, y in inverseDocumentFrequency.iteritems()]
return tf_idf
def skewLabels(self, dataframe, problemType):
self.status.message(1, "skewLabels(self, dataframe, problemType)")
if problemType == "0":
labels = np.array(list(dataframe.index))
labelsnew = labels.reshape(110,1)
booleanLabels = np.core.defchararray.startswith(labelsnew, 'l')
intlabels = booleanLabels.astype(int)
self.status.message(0, "skewLabels(self, dataframe, problemType)")
return intlabels
def adjoin(self, dimensions, labels):
self.status.message(1, "adjoin(self, dimensions, labels)")
fullset = np.hstack((dimensions, labels))
self.status.message(0, "adjoin(self, dimensions, labels)")
return fullset
def writeToFile(self, fullset, filename):
self.status.message(1, "writeToFile(self, fullset)")
np.savetxt(filename, fullset, delimiter = ',')
self.status.message(0, "writeToFile(self, fullset)")
return