Source code for nbdtools.nbdpred

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import numpy as np
from sklearn.neighbors import KNeighborsClassifier


[docs]class NbdPred(object): """ A neighborhood predictor class which takes as a parameter a list of places whose neighborhood is known. The predictor is nearest neighbor. Parameters __________ :param list loc_and_n: the data used to build a predictor, as a list of places. Each place is a list of two floats and a str; the two floats are the location of the place, and the str is the neighborhood the place belongs to. We use the following example throughout. >>> from nbdtools.nbdpred import NbdPred >>> loc_and_n = [[0, 0, 'A'], [0, 1, 'A'], [2, 0, 'B'], [2, 1, 'B']] >>> npred = NbdPred(loc_and_n) .. todo:: Neighborhood graph """ def __init__(self, loc_and_n): self.loc_and_n = loc_and_n self.neighborhoods = set(zip(*loc_and_n)[2]) """The set of neighborhoods.""" #(helpful for plotting and colors) self.neighborhoods_list = sorted(list(self.neighborhoods), reverse = True) """The list of neighborhoods.""" self.num_neighborhoods = len(self.neighborhoods_list) """The number of neighborhoods.""" #make a frequency dictionary self.nfreq = {n : 0 for n in self.neighborhoods} for r in self.loc_and_n: self.nfreq[r[2]] += 1 #Assign a random color to each neighborhood self.neighborhood_colors = {n:map(lambda x : x*0.8, (np.random.random(), np.random.random(), np.random.random())) for n in self.neighborhoods} #Create color map self.ncmap = ListedColormap([self.neighborhood_colors[n] for n in self.neighborhoods_list]) #Get the lats and longs self.latis = [r[0] for r in self.loc_and_n] self.longis = [r[1] for r in self.loc_and_n]
[docs] def make_predictor(self, train_percent): """ Split the data set into training and test sets, return a nearest neighbor predictor trained on the training set and the classification rate on the test set. Parameters __________ :param float train_percent: the percentage of the data set that will go into the training set Returns _______ :return: a nearest neighbor predictor and its classification rate :rtype: :class:`sklearn.neighbors.KNeighborsClassifier`, float >>> from nbdtools.nbdpred import NbdPred >>> loc_and_n = [[0, 0, 'A'], [0, 1, 'A'], [2, 0, 'B'], [2, 1, 'B']] >>> npred = NbdPred(loc_and_n) >>> nnclassifier, classrate = npred.make_predictor(train_percent=0.5) >>> print classrate 1.0 >>> print nnclassifier.predict([0,2]) ['A'] >>> print nnclassifier.predict([3,0]) ['B'] """ #divide the data into test and train: 90% train, 10% test datasize = len(self.loc_and_n) train_data_indices = [] #at least one point from each neigh should be in the train set for n in self.neighborhoods: #get the indices in loc_and_n of the points with neigh n indices = [self.loc_and_n.index(place) for place in self.loc_and_n if place[2] == n] train_data_indices += list(np.random.choice(indices, 1)) #fill in the rest of the train data l = [ind for ind in xrange(len(self.loc_and_n)) if not ind in train_data_indices] s = int(datasize*train_percent - len(train_data_indices)) train_data_indices += list(np.random.choice(a = l, size = s, replace = False)) #make the train and test data sets train_data = [self.loc_and_n[ind] for ind in train_data_indices] test_data_indices = [ind for ind in xrange(len(self.loc_and_n)) if not ind in train_data_indices] test_data = [self.loc_and_n[ind] for ind in test_data_indices] #train a nearest neighbor classifier NN = KNeighborsClassifier(n_neighbors=1) NN.fit([place[:2] for place in train_data], [place[2] for place in train_data]) #what's the classification rate on the test set? class_rate = sum([NN.predict(place[:2]) == place[2] for place in test_data])/float(len(test_data)) return NN, class_rate[0]
def plot_decision_regions(self, points = True): xx, yy = np.meshgrid(np.arange(min(self.longis), max(self.longis), 0.0005), np.arange(min(self.latis), max(self.latis), 0.0005)) Z = np.array(map(lambda n: self.neighborhoods_list.index(n), self.NN.predict(zip(yy.ravel(), xx.ravel())))) Z = Z.reshape(xx.shape) c = [self.neighborhoods_list.index(r[2]) for r in self.loc_and_n] plt.pcolormesh(xx,yy,Z, cmap = plt.get_cmap("Paired")) if points: plt.scatter(self.longis, self.latis, c = c, cmap = plt.get_cmap("Paired")) cbar = plt.colorbar(ticks = range(self.num_neighborhoods)) cbar.ax.set_yticklabels(self.neighborhoods_list) plt.show()