Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions examples/plot_letters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
===============================
OCR Letter sequence recognition
===============================
This example illustrates the use of a chain CRF for optical character
recognition. The example is taken from Taskar et al "Max-margin markov random
fields".

Each example consists of a handwritten word, that was presegmented into
characters. Each character is represented as a 16x8 binary image. The task is
to classify the image into one of the 26 characters a-z. The first letter of
every word was ommited as it was capitalized and the task does only consider
small caps letters.

We compare classification using a standard linear SVM that classifies
each letter individually with a chain CRF that can exploit correlations
between neighboring letters (the correlation is particularly strong
as the same words are used during training and testsing).

The first figures shows the segmented letters of four words from the test set.
In set are the ground truth (green), the prediction using SVM (blue) and the
prediction using a chain CRF (red).

The second figure shows the pairwise potentials learned by the chain CRF.
The strongest patterns are "y after l" and "n after i".

There are obvious extensions that both methods could benefit from, such as
window features or non-linear kernels. This example is more meant to give a
demonstration of the CRF than to show its superiority.
"""
import numpy as np
import matplotlib.pyplot as plt

from sklearn.svm import LinearSVC
#from sklearn.metrics import confusion_matrix

from pystruct.datasets import load_letters
from pystruct.models import ChainCRF
from pystruct.learners import OneSlackSSVM

abc = "abcdefghijklmnopqrstuvwxyz"

letters = load_letters()
X, y, folds = letters['data'], letters['labels'], letters['folds']
# we convert the lists to object arrays, as that makes slicing much more
# convenient
X, y = np.array(X), np.array(y)
X_train, X_test = X[folds == 1], X[folds != 1]
y_train, y_test = y[folds == 1], y[folds != 1]

# Train linear SVM
svm = LinearSVC(dual=False, C=.1)
# flatten input
svm.fit(np.vstack(X_train), np.hstack(y_train))

# Train linear chain CRF
model = ChainCRF(inference_method='qpbo')
ssvm = OneSlackSSVM(model=model, C=.1, inference_cache=50, tol=0.1)
ssvm.fit(X_train, y_train)

print("Test score with chain CRF: %f" % ssvm.score(X_test, y_test))

print("Test score with linear SVM: %f" % svm.score(np.vstack(X_test),
np.hstack(y_test)))

# plot some word sequenced
n_words = 4
rnd = np.random.RandomState(1)
selected = rnd.randint(len(y_test), size=n_words)
max_word_len = max([len(y) for y in y_test[selected]])
fig, axes = plt.subplots(n_words, max_word_len, figsize=(10, 10))
fig.subplots_adjust(wspace=0)
for ind, axes_row in zip(selected, axes):
y_pred_svm = svm.predict(X_test[ind])
y_pred_chain = ssvm.predict([X_test[ind]])[0]
for i, (a, image, y_true, y_svm, y_chain) in enumerate(
zip(axes_row, X_test[ind], y_test[ind], y_pred_svm, y_pred_chain)):
a.matshow(image.reshape(16, 8), cmap=plt.cm.Greys)
a.text(0, 3, abc[y_true], color="#00AA00", size=25)
a.text(0, 14, abc[y_svm], color="#5555FF", size=25)
a.text(5, 14, abc[y_chain], color="#FF5555", size=25)
a.set_xticks(())
a.set_yticks(())
for ii in xrange(i + 1, max_word_len):
axes_row[ii].set_visible(False)

plt.matshow(ssvm.w[26 * 8 * 16:].reshape(26, 26))
plt.title("Transition parameters of the chain CRF.")
plt.xticks(np.arange(25), abc)
plt.yticks(np.arange(25), abc)
plt.show()
3 changes: 2 additions & 1 deletion pystruct/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
generate_checker_multinomial,
generate_big_checker)
from .scene import load_scene
from .letters import load_letters

__all__ = ['generate_blocks', 'generate_blocks_multinomial', 'generate_bars',
'generate_crosses_explicit', 'binary', 'multinomial', 'load_scene',
'make_simple_2x2', 'generate_easy', 'generate_crosses',
'generate_checker', 'generate_checker_multinomial',
'generate_big_checker']
'generate_big_checker', 'load_letters']
Binary file added pystruct/datasets/letters.pickle
Binary file not shown.
22 changes: 22 additions & 0 deletions pystruct/datasets/letters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import cPickle
from os.path import dirname
from os.path import join

import numpy as np


def load_letters():
"""Load the OCR letters dataset.

This is a chain classification task.
Each example consists of a word, segmented into letters.
The first letter of each word is ommited from the data,
as it was a capital letter (in contrast to all other letters).
"""
module_path = dirname(__file__)
data_file = open(join(module_path, 'letters.pickle'))
data = cPickle.load(data_file)
# we add an easy to use image representation:
data['images'] = [np.hstack([l.reshape(16, 8) for l in word])
for word in data['data']]
return data
2 changes: 1 addition & 1 deletion pystruct/learners/n_slack_ssvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def _solve_n_slack_qp(self, constraints, n_samples):
psis = [c[1] for sample in constraints for c in sample]
losses = [c[2] for sample in constraints for c in sample]

psi_matrix = np.vstack(psis)
psi_matrix = np.vstack(psis).astype(np.float)
n_constraints = len(psis)
P = cvxopt.matrix(np.dot(psi_matrix, psi_matrix.T))
# q contains loss from margin-rescaling
Expand Down
3 changes: 2 additions & 1 deletion pystruct/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .crf import CRF
from .grid_crf import GridCRF, DirectionalGridCRF
from .graph_crf import GraphCRF
from .chain_crf import ChainCRF
from .latent_grid_crf import LatentGridCRF, LatentDirectionalGridCRF
from .latent_graph_crf import LatentGraphCRF
from .latent_node_crf import LatentNodeCRF, EdgeFeatureLatentNodeCRF
Expand All @@ -12,5 +13,5 @@
__all__ = ["StructuredModel", "CRF", "GridCRF", "GraphCRF",
"DirectionalGridCRF", "BinaryClf", "LatentGridCRF",
"LatentDirectionalGridCRF", "MultiClassClf", "LatentGraphCRF",
"MultiLabelClf", "LatentNodeCRF", "EdgeFeatureGraphCRF",
"MultiLabelClf", "ChainCRF", "LatentNodeCRF", "EdgeFeatureGraphCRF",
"EdgeFeatureLatentNodeCRF"]
80 changes: 80 additions & 0 deletions pystruct/models/chain_crf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import numpy as np

from .graph_crf import GraphCRF


def make_chain_edges(x):
# this can be optimized sooooo much!
inds = np.arange(x.shape[0])
edges = np.concatenate([inds[:-1, np.newaxis], inds[1:, np.newaxis]],
axis=1)
return edges


class ChainCRF(GraphCRF):
"""Linear-chain CRF.

Pairwise potentials are symmetric and the same for all edges.
This leads to ``n_classes`` parameters for unary potentials.
If ``directed=True``, there are ``n_classes * n_classes`` parameters
for pairwise potentials, if ``directed=False``, there are only
``n_classes * (n_classes + 1) / 2`` (for a symmetric matrix).

Unary evidence ``x`` is given as array of shape (n_nodes, n_features), and
labels ``y`` are given as array of shape (n_nodes,). Chain lengths do not
need to be constant over the dataset.

Parameters
----------
n_states : int, default=2
Number of states for all variables.

inference_method : string or None, default=None
Function to call do do inference and loss-augmented inference.
Possible values are:

- 'qpbo' for QPBO + alpha expansion.
- 'dai' for LibDAI bindings (which has another parameter).
- 'lp' for Linear Programming relaxation using GLPK.
- 'ad3' for AD3 dual decomposition.

If None, ad3 is used if installed, otherwise lp.

class_weight : None, or array-like
Class weights. If an array-like is passed, it must have length
n_classes. None means equal class weights.

directed : boolean, default=False
Whether to model directed or undirected connections.
In undirected models, interaction terms are symmetric,
so an edge ``a -> b`` has the same energy as ``b -> a``.
"""
def __init__(self, n_states=None, n_features=None, inference_method=None,
class_weight=None, directed=True):
GraphCRF.__init__(self, n_states=n_states, n_features=n_features,
inference_method=inference_method,
class_weight=class_weight, directed=directed)

def _get_edges(self, x):
return make_chain_edges(x)

def _get_features(self, x):
return x

def initialize(self, X, Y):
n_features = X[0].shape[1]
if self.n_features is None:
self.n_features = n_features
elif self.n_features != n_features:
raise ValueError("Expected %d features, got %d"
% (self.n_features, n_features))

n_states = len(np.unique(np.hstack([y for y in Y])))
if self.n_states is None:
self.n_states = n_states
elif self.n_states != n_states:
raise ValueError("Expected %d states, got %d"
% (self.n_states, n_states))

self._set_size_psi()
self._set_class_weight()
3 changes: 3 additions & 0 deletions pystruct/models/crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from .base import StructuredModel
from ..inference import inference_dispatch, get_installed
#from .utils import loss_augment_unaries


class CRF(StructuredModel):
Expand All @@ -20,6 +21,8 @@ def __init__(self, n_states=None, n_features=None, inference_method=None,
self._set_class_weight()

def initialize(self, X, Y):
# Works for both GridCRF and GraphCRF, but not ChainCRF.
# funny that ^^
n_features = X[0][0].shape[1]
if self.n_features is None:
self.n_features = n_features
Expand Down
10 changes: 7 additions & 3 deletions pystruct/models/graph_crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ class GraphCRF(CRF):
"""Pairwise CRF on a general graph.

Pairwise potentials are symmetric and the same for all edges.
This leads to n_classes parameters for unary potentials and
n_classes * (n_classes + 1) / 2 parameters for edge potentials.
This leads to n_classes parameters for unary potentials.
If ``directed=True``, there are ``n_classes * n_classes`` parameters
for pairwise potentials, if ``directed=False``, there are only
``n_classes * (n_classes + 1) / 2`` (for a symmetric matrix).

Examples, i.e. X, are given as an iterable of n_examples.
An example, x, is represented as a tuple (features, edges) where
Expand All @@ -27,7 +29,7 @@ class GraphCRF(CRF):
n_features : int, default=None
Number of features per node. None means n_states.

inference_method : string, default="ad3"
inference_method : string or None, default=None
Function to call do do inference and loss-augmented inference.
Possible values are:

Expand All @@ -36,6 +38,8 @@ class GraphCRF(CRF):
- 'lp' for Linear Programming relaxation using GLPK.
- 'ad3' for AD3 dual decomposition.

If None, ad3 is used if installed, otherwise lp.

class_weight : None, or array-like
Class weights. If an array-like is passed, it must have length
n_classes. None means equal class weights.
Expand Down
10 changes: 10 additions & 0 deletions src/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,13 @@ def crammer_singer_psi(double[:,:] X, long[:] Y, double[:, :] out):
y = Y[i]
for j in xrange(X.shape[1]):
out[y, j] += X[i, j]

# untested!
#def loss_augment_unaries(double[:,:] unary_potentials, long[:] y, double[:] class_weight):
# cdef int i
# cdef int n_states = unary_potentials.shape[1]
# for i in range(unary_potentials.shape[0]):
# for s in range(n_states):
# if s == y[i]:
# continue
# unary_potentials[i, s] += class_weight[s]
3 changes: 1 addition & 2 deletions tests/test_learners/test_edge_feature_graph_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ def test_multinomial_blocks_directional_simple():
X = zip([x.reshape(-1, 3) for x in X_], edges, edge_features)
Y = [y.ravel() for y in Y_]

crf = EdgeFeatureGraphCRF(n_states=3,
n_edge_features=2)
crf = EdgeFeatureGraphCRF(n_states=3, n_edge_features=2)
clf = NSlackSSVM(model=crf, max_iter=10, C=1, check_constraints=False)
clf.fit(X, Y)
Y_pred = clf.predict(X)
Expand Down
42 changes: 42 additions & 0 deletions tests/test_models/test_chain_crf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import numpy as np
from numpy.testing import assert_array_equal, assert_equal

from nose.tools import assert_raises

from pystruct.models import ChainCRF


def test_initialize():
rnd = np.random.RandomState(0)
x = rnd.normal(size=(13, 5))
y = rnd.randint(3, size=13)
crf = ChainCRF(n_states=3, n_features=5)
# no-op
crf.initialize([x], [y])

#test initialization works
crf = ChainCRF()
crf.initialize([x], [y])
assert_equal(crf.n_states, 3)
assert_equal(crf.n_features, 5)

crf = ChainCRF(n_states=2)
assert_raises(ValueError, crf.initialize, X=[x], Y=[y])
pass


def test_directed_chain():
# check that a directed model actually works differntly in the two
# directions. chain of length three, three states 0, 1, 2 which want to be
# in this order, evidence only in the middle
x = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]])

w = np.array([1, 0, 0, # unary
0, 1, 0,
0, 0, 1,
0, 1, 0, # pairwise
0, 0, 1,
0, 0, 0])
crf = ChainCRF(n_states=3, n_features=3)
y = crf.inference(x, w)
assert_array_equal([0, 1, 2], y)