import cPickle
import mdp
import gzip
import numpy as np
class DigitsIterator:
def __init__(self, digits, labels):
self.digits = digits
self.targets = labels
def __iter__(self):
frac = 10
ll = len(self.targets)
for i in xrange(frac):
yield self.digits[i*ll/frac:(i+1)*ll/frac], \
self.targets[i*ll/frac:(i+1)*ll/frac]
data = cPickle.load(gzip.open('mnist.pickle.gz'))
for k in ['traindata', 'testdata']:
data[k] = data[k].reshape(-1, 28 * 28)
fdaclf = (mdp.nodes.WhiteningNode(output_dim=10, dtype='d') +
mdp.nodes.PolynomialExpansionNode(2) +
mdp.nodes.FDANode(output_dim=9) +
mdp.nodes.GaussianClassifierNode())
fdaclf.verbose = True
fdaclf.train([[data['traindata']],
None,
DigitsIterator(data['traindata'],
data['trainlabels']),
DigitsIterator(data['traindata'],
data['trainlabels'])
])
feature_space = fdaclf[:-1](data['testdata'])
guess = fdaclf[-1].label(feature_space)
err = 1 - np.mean(guess == data['testlabels'])
print 'Test error:', err
import pylab as pl
from mpl_toolkits.mplot3d import Axes3D
from mvpa2.suite import *
Following MNIST dataset is not distributed along with PyMVPA due to its size. Please download it into directory from which you are running this example first. Data visualization depends on the 3D matplotlib features, which are available only from version 0.99
data = cPickle.load(gzip.open('mnist.pickle.gz'))
train = dataset_wizard(
data['traindata'],
targets=data['trainlabels'],
chunks='train')
test = dataset_wizard(
data['testdata'],
targets=data['testlabels'],
chunks='test')
# merge the datasets into on
ds = vstack((train, test))
ds.init_origids('samples')
#examples = [0, 25024, 50000, 59000]
examples = [3001 + 5940 * i for i in range(10)]
#examples = [0, 9000, 18000]
pl.figure(figsize=(2, 5))
for i, id_ in enumerate(examples):
ax = pl.subplot(2, 5, i+1)
ax.axison = False
pl.imshow(data['traindata'][id_].T, cmap=pl.cm.gist_yarg,
interpolation='nearest', aspect='equal')
pl.subplots_adjust(left=0, right=1, bottom=0, top=1,
wspace=0.05, hspace=0.05)
pl.draw()
fdaflow = (mdp.nodes.WhiteningNode(output_dim=10, dtype='d') +
mdp.nodes.PolynomialExpansionNode(2) +
mdp.nodes.FDANode(output_dim=9))
fdaflow.verbose = True
mapper = MDPFlowMapper(fdaflow,
([], [], [DatasetAttributeExtractor('sa', 'targets')]))
tm = TransferMeasure(MappedClassifier(SMLR(), mapper),
Splitter('chunks', attr_values=['train', 'test']),
enable_ca=['stats', 'samples_error'])
tm(ds)
print 'Test error:', 1 - tm.ca.stats.stats['ACC']
if externals.exists('matplotlib') \
and externals.versions['matplotlib'] >= '0.99':
pts = []
for i, ex in enumerate(examples):
pts.append(mapper.forward(ds.samples[ex:ex+200])[:, :3])
fig = pl.figure()
ax = Axes3D(fig)
colors = ('r','g','b','k','c','m','y','burlywood','chartreuse','gray')
clouds = []
for i, p in enumerate(pts):
print i
clouds.append(ax.plot(p[:, 0], p[:, 1], p[:, 2], 'o', c=colors[i],
label=str(i), alpha=0.6))
ax.legend([str(i) for i in range(10)])
pl.draw()
See also
The full source code of this example is included in the PyMVPA source distribution (doc/examples/mdp_mnist.py).