MNIST handwritten digits

MDP-style classification

import cPickle
import mdp
import gzip
import numpy as np

class DigitsIterator:
    def __init__(self, digits, labels):
        self.digits = digits
        self.targets = labels
    def __iter__(self):
        frac = 10
        ll = len(self.targets)
        for i in xrange(frac):
            yield self.digits[i*ll/frac:(i+1)*ll/frac], \
                  self.targets[i*ll/frac:(i+1)*ll/frac]

data = cPickle.load(gzip.open('mnist.pickle.gz'))
for k in ['traindata', 'testdata']:
    data[k] = data[k].reshape(-1, 28 * 28)

fdaclf = (mdp.nodes.WhiteningNode(output_dim=10, dtype='d') +
          mdp.nodes.PolynomialExpansionNode(2) +
          mdp.nodes.FDANode(output_dim=9) +
          mdp.nodes.GaussianClassifierNode())

fdaclf.verbose = True

fdaclf.train([[data['traindata']],
               None,
               DigitsIterator(data['traindata'],
                              data['trainlabels']),
               DigitsIterator(data['traindata'],
                              data['trainlabels'])
               ])

feature_space = fdaclf[:-1](data['testdata'])
guess = fdaclf[-1].label(feature_space)
err = 1 - np.mean(guess == data['testlabels'])
print 'Test error:', err

Doing it the PyMVPA way

import pylab as pl
from mpl_toolkits.mplot3d import Axes3D
from mvpa2.suite import *

Following MNIST dataset is not distributed along with PyMVPA due to its size. Please download it into directory from which you are running this example first. Data visualization depends on the 3D matplotlib features, which are available only from version 0.99

data = cPickle.load(gzip.open('mnist.pickle.gz'))
train = dataset_wizard(
        data['traindata'],
        targets=data['trainlabels'],
        chunks='train')
test = dataset_wizard(
        data['testdata'],
        targets=data['testlabels'],
        chunks='test')
# merge the datasets into on
ds = vstack((train, test))
ds.init_origids('samples')

#examples = [0, 25024, 50000, 59000]
examples = [3001 + 5940 * i for i in range(10)]
#examples = [0, 9000, 18000]


pl.figure(figsize=(2, 5))

for i, id_ in enumerate(examples):
    ax = pl.subplot(2, 5, i+1)
    ax.axison = False
    pl.imshow(data['traindata'][id_].T, cmap=pl.cm.gist_yarg,
             interpolation='nearest', aspect='equal')

pl.subplots_adjust(left=0, right=1, bottom=0, top=1,
                  wspace=0.05, hspace=0.05)
pl.draw()


fdaflow = (mdp.nodes.WhiteningNode(output_dim=10, dtype='d') +
           mdp.nodes.PolynomialExpansionNode(2) +
           mdp.nodes.FDANode(output_dim=9))
fdaflow.verbose = True

mapper = MDPFlowMapper(fdaflow,
                       ([], [], [DatasetAttributeExtractor('sa', 'targets')]))

tm = TransferMeasure(MappedClassifier(SMLR(), mapper),
                     Splitter('chunks', attr_values=['train', 'test']),
                     enable_ca=['stats', 'samples_error'])
tm(ds)
print 'Test error:', 1 - tm.ca.stats.stats['ACC']

if externals.exists('matplotlib') \
   and externals.versions['matplotlib'] >= '0.99':
    pts = []
    for i, ex in enumerate(examples):
        pts.append(mapper.forward(ds.samples[ex:ex+200])[:, :3])

    fig = pl.figure()

    ax = Axes3D(fig)
    colors = ('r','g','b','k','c','m','y','burlywood','chartreuse','gray')
    clouds = []
    for i, p in enumerate(pts):
        print i
        clouds.append(ax.plot(p[:, 0], p[:, 1], p[:, 2], 'o', c=colors[i],
                              label=str(i), alpha=0.6))

    ax.legend([str(i) for i in range(10)])
    pl.draw()

See also

The full source code of this example is included in the PyMVPA source distribution (doc/examples/mdp_mnist.py).

Table Of Contents

Previous topic

Spatio-temporal Analysis of event-related fMRI data

Next topic

Analysis of eye movement patterns

NeuroDebian

NITRC-listed