Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,16 @@ addons:
- libhdf5-serial-dev
- libboost1.55-dev
- libboost-python1.55-dev
- gfortran
virtualenv:
system_site_packages: true
env:
matrix:
- DISTRIB="ubuntu" PYTHON_VERSION="2.7" OPENGM="true"
SCIPY_VERSION="0.11.0"
## ubuntu without opengm
- DISTRIB="ubuntu" PYTHON_VERSION="2.7" OPENGM="false"
SCIPY_VERSION="0.11.0"
## This environment tests the oldest supported anaconda env
- DISTRIB="conda" PYTHON_VERSION="2.7"
NUMPY_VERSION="1.6.2" SCIPY_VERSION="0.11.0"
Expand Down
1 change: 1 addition & 0 deletions continuous_integration/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ elif [[ "$DISTRIB" == "ubuntu" ]]; then
# Use standard ubuntu packages in their default version
# except for cython :-/
$PIP install --user cvxopt
$PIP install --user scipy==$SCIPY_VERSION
fi

if [[ "$COVERAGE" == "true" ]]; then
Expand Down
23 changes: 2 additions & 21 deletions examples/multi_label.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,12 @@

from sklearn.metrics import hamming_loss
from sklearn.datasets import fetch_mldata
from sklearn.metrics import mutual_info_score
from scipy.sparse.csgraph import minimum_spanning_tree

from pystruct.learners import OneSlackSSVM
from pystruct.models import MultiLabelClf
from pystruct.datasets import load_scene


def chow_liu_tree(y_):
# compute mutual information using sklearn
n_labels = y_.shape[1]
mi = np.zeros((n_labels, n_labels))
for i in range(n_labels):
for j in range(n_labels):
mi[i, j] = mutual_info_score(y_[:, i], y_[:, j])
mst = minimum_spanning_tree(sparse.csr_matrix(-mi))
edges = np.vstack(mst.nonzero()).T
edges.sort(axis=1)
return edges


dataset = "scene"
# dataset = "yeast"

Expand All @@ -64,13 +49,9 @@ def chow_liu_tree(y_):
X_train, X_test = scene['X_train'], scene['X_test']
y_train, y_test = scene['y_train'], scene['y_test']

n_labels = y_train.shape[1]
full = np.vstack([x for x in itertools.combinations(range(n_labels), 2)])
tree = chow_liu_tree(y_train)

full_model = MultiLabelClf(edges=full, inference_method='qpbo')
full_model = MultiLabelClf(edges="full", inference_method='qpbo')
independent_model = MultiLabelClf(inference_method='unary')
tree_model = MultiLabelClf(edges=tree, inference_method="max-product")
tree_model = MultiLabelClf(edges="tree", inference_method="max-product")

full_ssvm = OneSlackSSVM(full_model, inference_cache=50, C=.1, tol=0.01)

Expand Down
23 changes: 19 additions & 4 deletions pystruct/models/multilabel_svm.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import itertools
import numpy as np
from .crf import CRF
from ..utils import chow_liu_tree


class MultiLabelClf(CRF):
Expand Down Expand Up @@ -42,12 +44,24 @@ def __init__(self, n_labels=None, n_features=None, edges=None,

def _set_size_joint_feature(self):
# try to set the size of joint_feature if possible
if self.n_features is not None and self.n_states is not None:
if self.edges is None:
self.edges = np.zeros(shape=(0, 2), dtype=np.int)
if isinstance(self.edges, np.ndarray) and self.n_features is not None \
and self.n_states is not None:
self.size_joint_feature = (self.n_features * self.n_labels + 4 *
self.edges.shape[0])

def _set_edge_structure_from_labels(self, Y):
# build an edge structure from the ground truth lables
if isinstance(self.edges, str):
if self.edges == 'tree':
self.edges = chow_liu_tree(Y)
elif self.edges == 'full':
self.edges = np.vstack([x for x in
itertools.combinations(range(self.n_labels), 2)])
else:
raise ValueError("Unknown edge structure: %s" % self.edges)
if self.edges is None:
self.edges = np.zeros(shape=(0, 2), dtype=np.int)

def initialize(self, X, Y):
n_features = X.shape[1]
if self.n_features is None:
Expand All @@ -62,7 +76,8 @@ def initialize(self, X, Y):
elif self.n_labels != n_labels:
raise ValueError("Expected %d labels, got %d"
% (self.n_labels, n_labels))


self._set_edge_structure_from_labels(Y)
self._set_size_joint_feature()
self._set_class_weight()

Expand Down
55 changes: 55 additions & 0 deletions pystruct/tests/test_models/test_multilabel_problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,58 @@ def test_multilabel_fully():
y_continuous[np.arange(n_labels), y] = 1
assert_array_almost_equal(
joint_feature, model.joint_feature(x, (y_continuous, pairwise_marginals)))


def test_multilabel_fully_text_option():
# test inference and energy with fully connected model
# edge structure specified using text argument edges="full"
n_features = 5
n_labels = 4
edges = np.vstack([x for x in itertools.combinations(range(n_labels), 2)])
model = MultiLabelClf(n_labels=n_labels, n_features=n_features,
edges="full")
rnd = np.random.RandomState(0)

x = rnd.normal(size=n_features)
w = rnd.normal(size=n_features * n_labels + 4 * len(edges))
y_dummy = np.zeros((1, n_labels))
model.initialize(x[np.newaxis], y_dummy)
y = model.inference(x, w)

# test joint_feature / energy
joint_feature = model.joint_feature(x, y)
energy = compute_energy(model._get_unary_potentials(x, w),
model._get_pairwise_potentials(x, w), edges, y)
assert_almost_equal(energy, np.dot(joint_feature, w))

# for continuous y
#y_cont = model.inference(x, w, relaxed=True)
y_continuous = np.zeros((n_labels, 2))
pairwise_marginals = []
for edge in edges:
# indicator of one of four possible states of the edge
pw = np.zeros((2, 2))
pw[y[edge[0]], y[edge[1]]] = 1
pairwise_marginals.append(pw)

pairwise_marginals = np.vstack(pairwise_marginals)

y_continuous[np.arange(n_labels), y] = 1
assert_array_almost_equal(
joint_feature, model.joint_feature(x, (y_continuous, pairwise_marginals)))

def test_multilabel_tree_text_option():
# test edges="tree" results in the correct edge structure
n_features = 5
n_labels = 3
n_examples = 4
model = MultiLabelClf(n_labels=n_labels, n_features=n_features,
edges="tree")

rnd = np.random.RandomState(0)
X = rnd.normal(size=(n_examples, n_features))
Y = np.array([[1, 0, 1], [0, 1, 1], [1, 0, 0], [1, 1, 1]])
edges_expected = np.array([[0, 1], [1, 2]])

model.initialize(X, Y)
assert_array_equal(model.edges, edges_expected)
21 changes: 21 additions & 0 deletions pystruct/tests/test_utils/test_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import numpy as np
from pystruct.utils.graph import chow_liu_tree
from numpy.testing import assert_array_equal

def test_chow_liu_tree_small():
# choose y so largest MI goes to (0, 1) and (1, 2) edges
y = np.array([[1, 0, 1], [0, 1, 1], [1, 0, 0], [1, 1, 1]])
edges_expected = np.array([[0, 1], [1, 2]])

edges = chow_liu_tree(y)

assert_array_equal(edges, edges_expected)

def test_chow_liu_tree_zero_mi():
# choose y so that MI is nonzero between (0, 1) and 0 between (1, 2)
y = np.array([[1, 0, 1], [0, 1, 1]])
edges_expected = np.array([[0, 1]])

edges = chow_liu_tree(y)

assert_array_equal(edges, edges_expected)
4 changes: 2 additions & 2 deletions pystruct/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
exhaustive_inference, compress_sym, expand_sym)
from .logging import SaveLogger
from .plotting import plot_grid
from .graph import make_grid_edges, edge_list_to_features
from .graph import make_grid_edges, edge_list_to_features, chow_liu_tree

__all__ = ["unwrap_pairwise",
"make_grid_edges", "find_constraint",
"find_constraint_latent", "inference", "loss_augmented_inference",
"objective_primal", "exhaustive_loss_augmented_inference",
"exhaustive_inference", "SaveLogger", "plot_grid", "compress_sym",
"expand_sym", "edge_list_to_features"]
"expand_sym", "edge_list_to_features", "chow_liu_tree"]
16 changes: 16 additions & 0 deletions pystruct/utils/graph.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import numpy as np
from scipy import sparse
from sklearn.metrics import mutual_info_score
from scipy.sparse.csgraph import minimum_spanning_tree


def make_grid_edges(x, neighborhood=4, return_lists=False):
Expand All @@ -25,3 +28,16 @@ def edge_list_to_features(edge_list):
edge_features[:len(edge_list[0]), 0] = 1
edge_features[len(edge_list[0]):, 1] = 1
return edge_features


def chow_liu_tree(y_):
# compute mutual information using sklearn
n_labels = y_.shape[1]
mi = np.zeros((n_labels, n_labels))
for i in range(n_labels):
for j in range(n_labels):
mi[i, j] = mutual_info_score(y_[:, i], y_[:, j])
mst = minimum_spanning_tree(sparse.csr_matrix(-mi))
edges = np.vstack(mst.nonzero()).T
edges.sort(axis=1)
return edges