Switch to side-by-side view

--- a
+++ b/src/.ipynb_checkpoints/evals-checkpoint.py
@@ -0,0 +1,146 @@
+"""
+Author: Ritambhara Singh, Pinar Demetci, Rebecca Santorella
+19 February 2020
+"""
+import numpy as np
+import random, math, os, sys
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import normalize
+from sklearn.metrics import roc_auc_score, silhouette_samples
+from sklearn.decomposition import PCA
+
+def calc_frac_idx(x1_mat,x2_mat):
+	"""
+	Returns fraction closer than true match for each sample (as an array)
+	"""
+	fracs = []
+	x = []
+	nsamp = x1_mat.shape[0]
+	rank=0
+	for row_idx in range(nsamp):
+		euc_dist = np.sqrt(np.sum(np.square(np.subtract(x1_mat[row_idx,:], x2_mat)), axis=1))
+		true_nbr = euc_dist[row_idx]
+		sort_euc_dist = sorted(euc_dist)
+		rank =sort_euc_dist.index(true_nbr)
+		frac = float(rank)/(nsamp -1)
+
+		fracs.append(frac)
+		x.append(row_idx+1)
+
+	return fracs,x
+
+def calc_domainAveraged_FOSCTTM(x1_mat, x2_mat):
+	"""
+	Outputs average FOSCTTM measure (averaged over both domains)
+	Get the fraction matched for all data points in both directions
+	Averages the fractions in both directions for each data point
+	"""
+	fracs1,xs = calc_frac_idx(x1_mat, x2_mat)
+	fracs2,xs = calc_frac_idx(x2_mat, x1_mat)
+	fracs = []
+	for i in range(len(fracs1)):
+		fracs.append((fracs1[i]+fracs2[i])/2)  
+	return fracs
+
+def calc_sil(x1_mat,x2_mat,x1_lab,x2_lab):
+	"""
+	Returns silhouette score for datasets with cell clusters
+	"""
+	sil = []
+	sil_d0 = []
+	sil_d3 = []
+	sil_d7 = []
+	sil_d11 = []
+	sil_npc = []
+
+	x = np.concatenate((x1_mat,x2_mat))
+	lab = np.concatenate((x1_lab,x2_lab))
+
+	sil_score = silhouette_samples(x,lab)
+
+	nsamp = x.shape[0]
+	for i in range(nsamp):
+		if(lab[i]==1):
+			sil_d0.append(sil_score[i])
+		elif(lab[i]==2):
+			sil_d3.append(sil_score[i])
+		elif(lab[i]==3):
+			sil_d7.append(sil_score[i])
+		elif(lab[i]==4):
+			sil_d11.append(sil_score[i])
+		elif(lab[i]==5):
+			sil_npc.append(sil_score[i])
+
+	avg = np.mean(sil_score)
+	d0 = sum(sil_d0)/len(sil_d0)
+	d3 = sum(sil_d3)/len(sil_d3)
+	d7 = sum(sil_d7)/len(sil_d7)
+	d11 = sum(sil_d11)/len(sil_d11)
+	npc = sum(sil_npc)/len(sil_npc)
+	
+	return avg,d0,d3,d7,d11,npc
+
+def binarize_labels(label,x):
+	"""
+	Helper function for calc_auc
+	"""
+	bin_lab = np.array([1] * len(x))
+	idx = np.where(x == label)
+	
+	bin_lab[idx] = 0
+	return bin_lab
+	
+def calc_auc(x1_mat, x2_mat, x1_lab, x2_lab):
+	"""
+	calculate avg. ROC AUC scores for transformed data when there are >=2 number of clusters.
+	"""
+	nsamp = x1_mat.shape[0]
+	
+	auc = []
+	auc_d0 = []
+	auc_d3 = []
+	auc_d7 = []
+	auc_d11 = []
+	auc_npc = []
+	
+	for row_idx in range(nsamp):
+		euc_dist = np.sqrt(np.sum(np.square(np.subtract(x1_mat[row_idx,:], x2_mat)), axis=1))
+		y_scores = euc_dist
+		y_true = binarize_labels(x1_lab[row_idx],x2_lab)
+				
+		auc_score = roc_auc_score(y_true, y_scores)
+		auc.append(auc_score)
+	
+		if(x1_lab[row_idx]==0):
+			auc_d0.append(auc_score)
+		elif(x1_lab[row_idx]==1):
+			auc_d3.append(auc_score)
+		elif(x1_lab[row_idx]==2):
+			auc_d7.append(auc_score)
+		elif(x1_lab[row_idx]==3):
+			auc_d11.append(auc_score)
+		elif(x1_lab[row_idx]==4):
+			auc_npc.append(auc_score)
+		
+	avg = sum(auc)/len(auc)
+	d0 = sum(auc_d0)/len(auc_d0)
+	d3 = sum(auc_d3)/len(auc_d3)
+	d7 = sum(auc_d7)/len(auc_d7)
+	d11 = sum(auc_d11)/len(auc_d11)
+	npc = sum(auc_npc)/len(auc_npc)
+	
+	return avg,d0,d3,d7,d11,npc
+
+def transfer_accuracy(domain1, domain2, type1, type2, n):
+	"""
+	Metric from UnionCom: "Label Transfer Accuracy"
+	"""
+	knn = KNeighborsClassifier(n_neighbors=n)
+	knn.fit(domain2, type2)
+	type1_predict = knn.predict(domain1)
+	np.savetxt("type1_predict.txt", type1_predict)
+	count = 0
+	for label1, label2 in zip(type1_predict, type1):
+		if label1 == label2:
+			count += 1
+	return count / len(type1)