NLP_CRT / Git / Diff of /utils.py

Models:
philipB/
NLP_CRT
Downloads: 1
Diff of /utils.py [000000] .. [8d2107]
Switch to side-by-side view

--- a
+++ b/utils.py
@@ -0,0 +1,78 @@
+from sklearn.linear_model import LogisticRegression, LinearRegression
+import random
+import numpy as np
+import matplotlib.colors as mcolors
+import matplotlib.pyplot as plt
+
+def plot_predictions(X, y, model = None, title = ""):
+
+
+	h = .025
+	x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
+	y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
+	plt.figure()
+	if model != None:
+		xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
+
+		# Obtain labels for each point in mesh. Use last trained model.
+		Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
+
+
+		# Put the result into a color plot
+		Z = Z.reshape(xx.shape)
+		to_rgb = mcolors.ColorConverter().to_rgb
+		
+		plt.clf()
+		plt.imshow(Z, interpolation='nearest',
+		           extent=(xx.min(), xx.max(), yy.min(), yy.max()),
+		           cmap=plt.cm.brg,
+		           aspect='auto', origin='lower')
+
+	# # Plot also the training points
+	colors = ["blue", "red", "green", 'orange']
+	if y.shape[1] == 2:
+		colors = ['blue', 'green']
+	# #print compress(y)
+	y_c = compress(y)
+	labs = np.unique(list(range(y.shape[1])))
+
+	for i, color in zip(labs, colors):
+		#print i
+		idx = np.where(y_c == i)
+		plt.scatter(X[idx, 0], X[idx, 1], c=color)
+	plt.title(title)
+	plt.axis('tight')
+
+	# colors = [to_rgb(x) for x in ['red','blue', 'green']]
+
+	# plt.plot(X[:, 0], X[:, 1], '.', markersize=4)
+	plt.show()
+
+def sparsify_data(X, sparsity = .5, noise = .01):
+	d = X.shape[1]
+	d_sparse = int(d / sparsity)
+	new_X = np.random.rand(X.shape[0], d_sparse) * noise - noise / 2.
+	new_X[:, :d] = X
+	return new_X
+
+def compress(array):
+	return np.matrix([[i for i in range(len(x)) if x[i] == max(x)][0] for x in array]).transpose()
+
+def prediction_accuracy(X, y, model):
+	y_hat = model.predict(X)
+	correct = sum([int(y[i,y_hat[i]] == 1) for i in range(len(y_hat))])
+	return 1. * correct / len(y_hat)
+
+def generate_data(means = [(0, 0), (5, 0), (2.5, 2.5)], stdev = 1., classes = 3, n = 50):
+	#gaussian 1
+	K = len(means)
+	X = np.empty((n*K, max([len(x) for x in means])))
+	y = np.zeros((n*K, classes))
+	for k in range(K):
+		for i in range(len(means[k])):
+			X[k*n : (k+1)*n,i] = [random.gauss(means[k][i], stdev) for j in range(n)]
+		y[k*n : (k+1)*n, k % classes] = 1
+	return (X, y)
+
+def make_synthetic_data(means = [(0, 0), (2.5, -2.5), (5, 0), (2.5, 2.5)], stdev = 1., classes = 2, n = 50):
+	pass
\ No newline at end of file